diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..1848e1f1b6ebc124eeca8b2c10b67a4a343491ad --- /dev/null +++ b/Dockerfile @@ -0,0 +1,53 @@ +FROM nvcr.io/nvidia/tensorrt:21.09-py3 + +ENV DEBIAN_FRONTEND=noninteractive +ARG USERNAME=user +ARG WORKDIR=/workspace/ByteTrack + +RUN apt-get update && apt-get install -y \ + automake autoconf libpng-dev nano python3-pip \ + curl zip unzip libtool swig zlib1g-dev pkg-config \ + python3-mock libpython3-dev libpython3-all-dev \ + g++ gcc cmake make pciutils cpio gosu wget \ + libgtk-3-dev libxtst-dev sudo apt-transport-https \ + build-essential gnupg git xz-utils vim \ + libva-drm2 libva-x11-2 vainfo libva-wayland2 libva-glx2 \ + libva-dev libdrm-dev xorg xorg-dev protobuf-compiler \ + openbox libx11-dev libgl1-mesa-glx libgl1-mesa-dev \ + libtbb2 libtbb-dev libopenblas-dev libopenmpi-dev \ + && sed -i 's/# set linenumbers/set linenumbers/g' /etc/nanorc \ + && apt clean \ + && rm -rf /var/lib/apt/lists/* + +RUN git clone https://github.com/ifzhang/ByteTrack \ + && cd ByteTrack \ + && git checkout 3434c5e8bc6a5ae8ad530528ba8d9a431967f237 \ + && mkdir -p YOLOX_outputs/yolox_x_mix_det/track_vis \ + && sed -i 's/torch>=1.7/torch==1.9.1+cu111/g' requirements.txt \ + && sed -i 's/torchvision==0.10.0/torchvision==0.10.1+cu111/g' requirements.txt \ + && sed -i "s/'cuda'/0/g" tools/demo_track.py \ + && pip3 install pip --upgrade \ + && pip3 install -r requirements.txt -f https://download.pytorch.org/whl/torch_stable.html \ + && python3 setup.py develop \ + && pip3 install cython \ + && pip3 install 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI' \ + && pip3 install cython_bbox gdown \ + && ldconfig \ + && pip cache purge + +RUN git clone https://github.com/NVIDIA-AI-IOT/torch2trt \ + && cd torch2trt \ + && git checkout 0400b38123d01cc845364870bdf0a0044ea2b3b2 \ + # https://github.com/NVIDIA-AI-IOT/torch2trt/issues/619 + && wget https://github.com/NVIDIA-AI-IOT/torch2trt/commit/8b9fb46ddbe99c2ddf3f1ed148c97435cbeb8fd3.patch \ + && git apply 8b9fb46ddbe99c2ddf3f1ed148c97435cbeb8fd3.patch \ + && python3 setup.py install + +RUN echo "root:root" | chpasswd \ + && adduser --disabled-password --gecos "" "${USERNAME}" \ + && echo "${USERNAME}:${USERNAME}" | chpasswd \ + && echo "%${USERNAME} ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers.d/${USERNAME} \ + && chmod 0440 /etc/sudoers.d/${USERNAME} +USER ${USERNAME} +RUN sudo chown -R ${USERNAME}:${USERNAME} ${WORKDIR} +WORKDIR ${WORKDIR} \ No newline at end of file diff --git a/datasets/data_path/citypersons.train b/datasets/data_path/citypersons.train new file mode 100644 index 0000000000000000000000000000000000000000..0dc55728e07028ae42f53cedeb91419c6babd54b --- /dev/null +++ b/datasets/data_path/citypersons.train @@ -0,0 +1,2500 @@ +Cityscapes/images/train/jena/jena_000078_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000032_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000055_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000067_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000111_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000105_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000021_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000045_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000058_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000096_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000042_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000109_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000065_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000068_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000052_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000051_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000091_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000037_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000050_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000092_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000101_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000081_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000074_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000040_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000030_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000059_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000100_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000034_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000089_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000104_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000080_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000082_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000044_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000026_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000115_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000023_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000095_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000102_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000094_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000010_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000009_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000070_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000039_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000079_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000071_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000018_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000008_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000076_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000090_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000085_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000116_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000049_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000022_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000118_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000004_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000087_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000047_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000007_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000033_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000097_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000028_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000086_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000073_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000056_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000069_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000088_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000103_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000057_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000038_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000061_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000048_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000060_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000027_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000031_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000072_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000043_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000054_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000083_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000077_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000108_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000106_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000084_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000075_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000024_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000110_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000029_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000000_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000117_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000036_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000046_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000062_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000035_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000019_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000112_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000025_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000013_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000113_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000053_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000098_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000066_000019_leftImg8bit.png +Cityscapes/images/train/jena/jena_000041_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000110_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000107_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000087_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000188_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000175_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000147_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000043_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000042_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000191_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000189_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000007_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000112_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000032_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000085_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000077_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000144_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000161_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000149_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000017_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000010_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000067_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000178_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000070_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000026_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000056_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000171_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000157_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000100_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000158_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000125_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000134_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000168_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000156_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000162_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000024_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000037_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000094_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000129_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000069_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000011_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000095_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000002_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000160_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000111_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000039_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000184_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000165_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000046_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000065_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000022_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000135_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000030_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000133_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000172_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000105_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000014_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000142_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000079_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000183_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000173_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000028_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000083_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000071_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000106_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000169_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000126_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000019_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000148_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000078_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000023_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000108_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000153_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000033_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000088_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000027_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000016_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000181_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000044_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000182_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000140_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000186_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000090_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000099_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000124_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000176_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000009_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000008_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000004_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000048_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000020_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000093_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000152_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000155_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000138_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000136_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000103_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000145_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000163_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000101_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000005_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000029_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000063_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000139_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000150_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000123_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000064_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000190_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000092_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000091_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000130_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000164_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000170_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000131_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000031_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000159_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000054_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000167_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000012_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000104_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000021_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000053_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000058_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000001_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000179_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000066_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000034_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000050_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000013_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000068_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000137_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000086_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000187_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000080_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000097_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000192_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000132_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000015_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000025_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000096_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000059_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000102_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000174_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000166_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000180_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000185_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000098_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000141_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000151_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000006_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000047_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000146_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000072_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000073_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000089_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000055_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000193_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000018_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000109_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000076_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000074_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000177_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000127_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000121_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000084_000019_leftImg8bit.png +Cityscapes/images/train/stuttgart/stuttgart_000061_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000019_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000063_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000066_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000037_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000091_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000004_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000050_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000092_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000079_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000020_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000071_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000027_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000068_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000002_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000084_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000003_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000013_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000000_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000069_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000065_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000077_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000017_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000012_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000074_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000055_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000072_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000093_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000018_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000028_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000075_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000044_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000043_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000062_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000059_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000052_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000082_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000053_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000006_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000087_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000024_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000007_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000070_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000080_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000057_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000085_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000011_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000064_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000005_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000067_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000060_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000040_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000010_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000073_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000023_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000031_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000089_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000039_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000016_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000058_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000047_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000030_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000078_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000041_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000086_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000051_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000014_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000056_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000015_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000008_000019_leftImg8bit.png +Cityscapes/images/train/ulm/ulm_000049_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000114_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000099_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000014_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000204_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000205_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000055_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000052_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000162_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000126_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000118_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000159_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000024_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000077_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000092_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000131_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000021_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000155_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000062_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000179_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000151_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000012_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000160_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000038_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000139_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000152_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000167_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000031_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000081_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000111_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000149_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000108_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000202_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000036_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000170_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000174_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000005_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000173_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000008_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000163_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000022_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000056_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000019_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000140_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000175_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000088_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000078_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000135_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000107_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000124_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000083_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000029_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000009_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000156_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000000_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000143_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000066_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000192_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000082_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000057_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000119_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000079_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000067_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000068_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000172_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000073_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000030_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000037_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000063_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000069_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000209_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000070_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000169_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000178_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000150_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000032_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000080_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000120_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000018_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000176_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000115_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000110_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000026_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000041_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000185_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000189_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000075_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000044_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000071_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000002_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000094_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000093_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000203_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000121_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000136_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000117_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000051_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000193_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000089_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000147_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000013_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000165_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000201_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000206_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000157_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000197_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000153_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000113_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000198_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000164_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000122_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000064_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000123_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000208_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000010_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000134_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000090_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000199_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000187_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000015_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000154_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000035_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000180_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000074_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000025_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000086_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000210_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000195_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000040_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000065_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000200_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000076_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000096_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000158_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000023_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000145_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000072_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000104_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000060_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000125_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000027_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000098_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000211_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000138_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000061_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000190_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000084_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000034_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000045_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000137_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000181_000019_leftImg8bit.png +Cityscapes/images/train/dusseldorf/dusseldorf_000059_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000050_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000071_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000073_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000019_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000046_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000083_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000036_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000001_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000062_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000034_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000044_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000068_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000074_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000075_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000055_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000025_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000029_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000082_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000006_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000053_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000043_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000016_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000031_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000037_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000064_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000048_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000008_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000010_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000078_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000042_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000084_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000045_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000070_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000033_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000059_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000035_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000054_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000018_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000080_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000026_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000030_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000028_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000056_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000024_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000066_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000049_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000003_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000058_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000081_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000002_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000051_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000041_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000023_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000021_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000022_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000007_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000000_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000047_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000067_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000015_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000027_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000061_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000052_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000032_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000017_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000060_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000069_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000076_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000065_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000063_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000072_000019_leftImg8bit.png +Cityscapes/images/train/darmstadt/darmstadt_000079_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000040_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000074_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000103_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000089_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000037_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000019_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000101_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000072_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000008_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000051_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000015_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000020_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000114_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000062_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000115_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000070_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000069_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000081_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000005_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000119_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000079_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000096_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000110_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000117_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000054_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000002_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000042_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000012_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000085_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000045_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000077_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000084_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000031_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000034_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000011_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000021_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000022_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000073_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000111_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000113_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000090_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000053_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000006_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000120_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000030_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000088_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000010_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000055_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000038_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000035_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000004_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000083_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000016_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000102_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000013_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000066_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000064_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000001_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000099_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000078_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000118_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000067_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000033_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000121_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000028_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000094_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000068_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000014_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000080_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000109_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000023_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000105_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000025_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000024_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000061_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000009_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000060_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000075_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000087_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000027_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000104_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000106_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000000_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000071_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000007_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000065_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000003_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000058_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000018_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000057_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000086_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000063_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000032_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000092_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000052_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000039_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000056_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000043_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000026_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000059_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000095_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000041_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000017_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000076_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000044_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000116_000019_leftImg8bit.png +Cityscapes/images/train/zurich/zurich_000029_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000193_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000165_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000223_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000032_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000280_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000175_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000076_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000295_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000002_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000084_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000009_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000085_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000244_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000299_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000099_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000206_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000110_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000090_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000228_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000214_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000078_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000037_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000016_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000083_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000144_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000158_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000057_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000105_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000097_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000207_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000140_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000208_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000047_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000253_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000260_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000087_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000026_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000044_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000168_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000136_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000176_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000292_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000235_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000311_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000014_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000191_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000242_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000029_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000104_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000170_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000285_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000224_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000067_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000288_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000131_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000286_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000298_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000036_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000213_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000103_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000255_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000275_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000065_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000094_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000204_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000064_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000171_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000252_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000148_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000287_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000290_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000120_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000221_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000056_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000315_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000205_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000048_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000222_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000313_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000259_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000226_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000186_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000030_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000243_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000055_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000212_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000059_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000303_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000312_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000043_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000174_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000086_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000238_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000075_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000246_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000134_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000066_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000257_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000049_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000185_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000069_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000282_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000229_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000211_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000194_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000309_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000283_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000053_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000218_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000126_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000138_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000179_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000188_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000015_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000198_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000011_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000058_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000019_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000135_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000040_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000182_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000305_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000300_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000233_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000220_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000046_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000314_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000124_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000190_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000081_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000022_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000023_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000128_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000100_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000035_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000273_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000020_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000063_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000248_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000137_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000041_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000250_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000209_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000163_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000304_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000160_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000038_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000210_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000308_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000306_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000025_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000141_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000291_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000061_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000142_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000073_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000054_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000042_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000264_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000070_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000006_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000310_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000187_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000201_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000013_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000106_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000162_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000297_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000072_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000269_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000216_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000199_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000177_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000095_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000098_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000281_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000184_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000254_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000109_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000068_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000028_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000296_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000050_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000077_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000258_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000200_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000196_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000080_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000062_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000079_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000241_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000119_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000268_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000132_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000003_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000045_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000133_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000172_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000266_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000231_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000108_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000034_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000146_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000276_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000008_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000261_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000249_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000251_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000219_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000101_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000139_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000145_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000052_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000157_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000118_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000031_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000007_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000256_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000102_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000051_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000074_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000197_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000089_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000173_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000217_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000180_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000302_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000001_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000143_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000227_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000027_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000262_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000004_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000010_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000116_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000307_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000121_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000271_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000021_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000149_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000012_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000071_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000092_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000017_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000263_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000060_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000178_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000130_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000082_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000301_000019_leftImg8bit.png +Cityscapes/images/train/bremen/bremen_000153_000019_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_036606_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_037039_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_025833_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_015645_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_014803_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_006026_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_023648_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_029203_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_028297_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_022210_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_022414_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_015038_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_020673_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_034936_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_024343_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_023040_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_000885_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_024855_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_015321_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_024196_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_002293_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_017216_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_006484_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_038150_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_006746_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_007651_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_014658_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_000600_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_016758_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_020776_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_004748_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_037223_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_015880_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_010562_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_025746_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_021606_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_016260_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_035958_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_009554_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_003674_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_003245_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_019188_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_008162_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_005936_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_001828_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_011711_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_016591_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_017453_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_016125_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_018195_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_007150_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_020899_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_011255_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_007950_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_014332_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_009951_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_002562_leftImg8bit.png +Cityscapes/images/train/bochum/bochum_000000_023174_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_073314_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_084746_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_047108_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_067338_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_098862_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_008221_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_041667_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_046566_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_038511_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_057678_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_059720_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_007737_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_068693_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_086499_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_093572_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_000042_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_042885_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_025986_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_077927_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_048750_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_025802_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_028608_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_046510_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_082301_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_103856_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_080169_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_047157_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_038915_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_016928_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_049558_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_053886_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_065983_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_069096_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_104857_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_073549_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_015350_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_065604_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_073672_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_042505_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_104428_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_052122_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_063698_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_090398_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_030279_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_099902_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_067799_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_085321_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_102574_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_002095_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_097447_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_064269_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_067587_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_097086_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_021961_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_080674_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_089491_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_068916_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_080438_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_039420_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_074425_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_044747_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_098616_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_036427_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_032266_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_070334_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_032460_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_040021_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_063403_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_043944_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_031971_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_045437_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_073999_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_092850_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_006322_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_035568_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_045908_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_088054_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_074267_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_085073_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_103075_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_085645_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_066706_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_026675_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_028439_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_062371_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_020211_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_048138_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_019373_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_032906_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_019760_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_003488_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_059339_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_079657_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_083696_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_105464_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_096063_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_053776_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_039264_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_037279_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_088783_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_048494_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_061790_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_073389_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_018592_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_054850_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_064825_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_078579_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_086636_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_046619_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_070444_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_054029_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_077756_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_087216_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_047057_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_078407_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_091900_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_014030_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_037036_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_029676_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_088983_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_073758_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_093325_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_033506_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_016691_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_006192_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_053086_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_001613_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_065843_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_100300_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_079376_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_027304_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_067223_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_060907_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_046872_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_077144_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_089696_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_091155_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_002338_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_098061_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_074545_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_101724_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_057816_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_056229_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_098400_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_003904_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_054555_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_060215_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_029144_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_081299_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_044400_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_038446_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_103367_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_036003_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_076392_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_058591_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_046078_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_036527_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_099109_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_102379_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_105123_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_029378_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_095561_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_020563_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_069417_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_077642_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_062964_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_076966_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_038729_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_088197_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_048960_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_066424_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_037741_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_060586_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_094717_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_062039_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_105296_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_019892_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_044996_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_071675_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_080878_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_055039_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_023472_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_008494_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_005639_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_047390_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_069177_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_069289_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_044251_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_084865_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_050160_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_090742_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_103541_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_087822_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_094185_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_018878_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_055414_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_065055_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_055894_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_092476_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_106102_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_051855_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_001106_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_088939_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_071942_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_053486_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_074694_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_085982_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_071150_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_037161_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_071016_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_062710_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_085413_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_088627_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_096624_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_066988_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_034049_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_045704_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_014940_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_099368_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_004985_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_024251_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_061468_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_091038_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_047220_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_027857_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_052904_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_061048_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_032719_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_083586_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_105724_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_053563_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_028056_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_054220_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_016447_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_082187_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_022524_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_039546_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_030953_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_103186_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_057487_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_013577_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_078842_leftImg8bit.png +Cityscapes/images/train/hamburg/hamburg_000000_056508_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000118_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000021_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000112_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000081_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000059_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000119_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000001_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000077_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000137_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000044_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000071_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000046_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000016_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000006_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000117_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000026_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000047_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000105_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000037_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000075_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000108_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000027_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000029_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000084_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000138_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000030_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000082_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000022_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000056_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000019_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000106_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000034_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000132_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000079_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000131_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000009_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000004_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000126_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000100_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000057_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000113_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000124_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000050_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000032_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000058_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000018_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000123_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000063_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000042_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000068_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000072_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000115_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000062_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000055_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000049_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000060_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000064_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000076_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000085_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000120_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000007_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000033_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000087_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000028_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000111_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000036_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000023_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000089_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000065_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000109_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000002_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000054_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000128_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000038_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000107_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000086_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000020_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000010_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000048_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000122_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000066_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000080_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000008_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000051_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000078_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000043_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000011_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000003_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000074_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000012_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000090_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000140_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000024_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000083_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000143_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000025_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000052_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000114_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000015_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000116_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000139_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000035_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000069_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000017_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000070_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000073_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000125_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000014_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000005_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000031_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000053_000019_leftImg8bit.png +Cityscapes/images/train/tubingen/tubingen_000134_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000025_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000070_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000048_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000097_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000091_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000006_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000142_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000020_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000129_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000150_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000003_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000110_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000040_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000082_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000023_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000059_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000135_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000138_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000131_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000029_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000137_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000047_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000031_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000076_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000127_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000159_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000073_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000102_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000153_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000086_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000085_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000058_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000123_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000143_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000017_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000141_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000101_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000081_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000125_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000105_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000038_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000032_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000054_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000009_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000088_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000139_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000090_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000111_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000136_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000077_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000072_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000005_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000034_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000166_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000164_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000039_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000067_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000056_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000026_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000162_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000021_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000148_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000098_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000122_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000075_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000161_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000000_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000093_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000099_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000071_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000087_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000066_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000053_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000103_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000116_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000106_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000172_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000080_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000033_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000065_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000035_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000024_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000046_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000114_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000019_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000092_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000119_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000155_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000063_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000094_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000030_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000084_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000016_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000112_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000132_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000154_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000089_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000069_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000062_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000168_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000010_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000133_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000173_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000169_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000042_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000120_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000134_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000145_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000022_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000045_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000109_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000049_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000074_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000041_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000068_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000027_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000050_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000008_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000057_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000147_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000055_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000051_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000018_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000115_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000171_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000140_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000126_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000100_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000052_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000118_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000156_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000013_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000121_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000012_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000002_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000146_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000144_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000036_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000108_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000083_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000015_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000004_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000107_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000095_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000064_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000163_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000014_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000113_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000128_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000011_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000117_000019_leftImg8bit.png +Cityscapes/images/train/aachen/aachen_000160_000019_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_025434_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_021553_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_029704_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_012353_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_030400_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_015116_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_005252_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_000108_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_023338_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_030701_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_026580_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_024921_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_028378_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_032390_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_026919_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_003096_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_014146_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_029050_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_017342_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_034231_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_020033_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_023510_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_030560_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_011655_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_027954_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_001908_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_002083_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_017042_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_015868_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_013139_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_019125_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_018514_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_022162_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_008305_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_036299_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_019697_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_016342_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_034389_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_034156_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_016863_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_020334_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_010653_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_030221_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_018747_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_021000_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_020873_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_012505_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_020933_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_024604_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_018004_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_021814_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_000316_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_009926_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_027596_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_020624_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_013766_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_025812_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_031257_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_035398_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_008584_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_034686_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_004447_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_024276_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_014673_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_030111_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_005503_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_007325_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_021222_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_015494_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_026269_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_009574_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_014886_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_017489_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_024362_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_003937_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_018866_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_027075_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_033478_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_019791_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_010160_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_023143_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_023698_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_004608_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_009404_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_011483_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_028638_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_010329_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_032614_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_000926_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_015687_leftImg8bit.png +Cityscapes/images/train/krefeld/krefeld_000000_006274_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_015849_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_008200_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_037039_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_045004_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_029455_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_044085_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_014537_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_003224_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_046954_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_002140_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_044195_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_020655_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_005970_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_058189_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_053027_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_026356_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_041610_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_040793_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_048379_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_034347_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_054965_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_046200_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_048274_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_055124_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_015587_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_024136_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_052512_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_038773_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_030781_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_034720_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_005599_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_029325_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_020089_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_007897_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_057532_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_051842_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_012675_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_019456_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_023881_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_027007_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_023975_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_047629_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_019282_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_052729_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_043550_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_043653_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_026804_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_001173_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_046572_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_027561_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_003853_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_052649_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_010553_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_017041_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_013814_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_029769_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_013094_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_036051_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_055800_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_012347_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_056361_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_021337_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_042581_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_032681_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_040221_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_056142_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_035768_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_010403_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_032351_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_040294_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_043102_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_027481_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_038855_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_005175_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_039021_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_056601_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_014319_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_043236_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_011170_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_022645_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_038927_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_051059_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_043822_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_031856_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_007342_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_014713_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_057710_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_011971_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_034935_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_009128_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_047499_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_006355_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_045188_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_052887_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_039470_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_016038_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_056457_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_049005_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_053437_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_008017_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_023614_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_029404_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_013205_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_044622_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_051271_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_032210_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_034141_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_037516_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_042992_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_016558_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_005288_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_004752_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_040051_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_014919_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_051536_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_026743_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_034015_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_028460_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_027650_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_025437_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_026183_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_027282_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_027766_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_033457_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_018546_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_001620_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_049269_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_024276_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_023239_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_029043_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_053604_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_024441_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_040133_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_023276_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_041232_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_052013_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_036562_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_031144_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_009420_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_042770_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_026014_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_042255_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_007780_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_006922_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_004230_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_054276_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_050228_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_034560_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_042382_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_011471_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_046398_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_030889_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_004646_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_019116_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_027998_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_051152_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_046646_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_055937_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_027390_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_035491_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_056800_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_040456_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_046732_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_019672_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_024989_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_025335_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_050398_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_030546_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_018800_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_044344_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_030346_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_045841_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_024719_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_002357_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_045657_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_037298_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_002458_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_030276_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_048508_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_028202_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_045446_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_048765_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_035606_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_047870_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_049465_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_009004_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_055592_leftImg8bit.png +Cityscapes/images/train/hanover/hanover_000000_005732_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000018_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000077_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000114_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000036_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000041_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000133_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000089_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000093_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000027_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000058_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000083_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000116_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000053_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000082_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000105_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000084_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000091_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000038_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000043_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000029_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000057_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000101_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000121_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000064_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000039_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000000_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000113_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000034_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000020_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000075_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000109_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000046_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000138_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000025_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000085_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000037_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000092_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000078_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000099_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000107_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000032_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000117_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000028_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000074_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000130_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000096_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000090_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000106_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000111_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000052_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000126_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000065_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000056_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000031_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000019_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000035_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000033_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000127_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000006_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000061_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000071_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000049_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000086_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000055_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000134_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000014_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000135_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000072_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000087_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000003_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000042_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000094_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000129_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000030_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000131_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000128_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000081_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000076_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000007_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000070_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000104_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000066_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000088_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000017_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000073_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000015_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000108_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000102_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000016_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000047_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000098_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000040_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000100_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000050_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000063_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000139_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000136_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000140_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000115_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000112_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000001_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000010_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000103_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000045_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000080_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000095_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000024_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000002_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000026_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000097_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000013_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000132_000019_leftImg8bit.png +Cityscapes/images/train/weimar/weimar_000054_000019_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_015602_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_055934_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_014503_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_054275_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_004660_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_007524_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_045135_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_008771_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_014416_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_023515_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_037090_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_029178_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_019247_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_007813_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_031582_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_020956_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_049399_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_049977_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_059433_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_019891_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_028379_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_008310_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_056330_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_029696_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_049143_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_035255_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_058105_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_025268_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_006995_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_029179_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_020432_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_002949_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_013266_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_034375_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_009246_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_033425_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_043886_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_001722_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_000508_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_030122_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_020653_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_012070_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_002081_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_016481_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_045880_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_002216_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_017540_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_026606_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_042869_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_009333_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_031683_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_033062_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_052497_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_021951_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_005995_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_027156_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_029281_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_024152_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_051661_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_021651_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_016247_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_061472_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_035008_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_035689_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_051877_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_014743_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_036232_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_030120_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_060173_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_013944_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_025907_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_003489_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_017159_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_000113_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_042434_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_042309_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_053976_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_040761_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_026575_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_039231_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_003632_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_025089_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_033838_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_014066_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_022489_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_051934_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_037645_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_031427_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_052430_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_009471_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_017283_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_030269_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_039114_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_016681_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_057517_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_009795_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_008677_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_003159_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_022067_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_006386_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_034387_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_060821_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_036937_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_025772_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_030839_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_028852_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_015506_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_006483_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_017761_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_024945_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_005289_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_065214_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_047619_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_029577_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_035942_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_002354_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_009619_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_028912_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_010755_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_010816_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_055860_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_031223_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_014629_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_009618_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_055273_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_052979_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_011990_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_030706_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_029400_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_015131_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_062691_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_014101_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_061685_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_042558_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_039558_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_011880_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_014584_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_001449_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_004745_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_022151_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_057811_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_057930_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_039703_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_025833_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_036697_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_028628_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_047955_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_032346_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_010640_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_024701_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_025426_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_052050_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_029839_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_032315_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_030017_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_026741_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_064393_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_016376_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_046324_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_024379_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_040620_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_019355_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_025491_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_014258_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_033747_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_034923_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_030539_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_017675_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_003676_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_015974_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_037776_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_035276_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_013654_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_011775_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_029051_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_058373_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_034494_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_026106_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_014033_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_016024_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_034040_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_030941_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_027233_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_040981_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_017469_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_002519_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_013322_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_051574_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_007441_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_019698_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_014931_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_052198_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_017844_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_019229_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_047755_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_019617_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_040564_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_030725_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_027097_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_011225_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_056142_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_060061_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_031976_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_052840_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_029339_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_054639_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_061285_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_013863_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_051317_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_003991_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_062362_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_010372_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_017593_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_007727_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_033129_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_053579_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_061384_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_004951_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_008603_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_051134_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_006264_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_033448_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_056857_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_020904_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_038281_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_016253_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_004983_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_002644_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_029481_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_015764_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_023064_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_006621_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_005219_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_006153_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_023271_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_028556_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_030997_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_026882_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_014235_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_047702_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_018153_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_004248_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_013574_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_026355_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_063808_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_005249_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_013223_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_029729_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_049776_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_064224_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_004106_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_048605_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_017044_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_026998_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_008784_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_010162_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_030324_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_031067_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_052544_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_052297_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_057191_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_017081_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_017450_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_039446_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_029915_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_051448_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_004383_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_034633_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_026316_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_000710_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_041215_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_035713_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_010445_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_036480_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_048121_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_030435_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_016436_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_039374_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_063385_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_029020_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_058954_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_012956_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_026611_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_055698_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_001072_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_025351_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_006106_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_005666_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_033027_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_031272_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_011617_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_018155_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_027771_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_005876_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_004112_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_035562_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_004260_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_031602_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_008576_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_023694_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_028822_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_032660_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_018432_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_053222_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_050098_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_016311_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_009097_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_010049_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_059914_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_013767_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_031116_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_028240_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_013914_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_003846_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_000778_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_032962_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_034097_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_047336_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_062542_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_022363_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_029980_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000000_018358_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_001901_leftImg8bit.png +Cityscapes/images/train/strasbourg/strasbourg_000001_042235_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000004_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000029_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000119_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000021_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000081_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000077_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000085_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000040_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000006_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000046_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000140_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000010_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000032_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000078_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000036_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000037_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000045_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000023_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000005_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000087_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000109_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000103_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000069_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000147_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000148_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000128_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000122_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000132_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000080_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000144_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000143_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000101_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000152_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000089_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000093_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000071_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000126_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000053_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000033_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000051_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000055_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000044_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000075_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000083_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000130_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000129_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000066_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000135_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000092_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000073_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000019_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000039_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000104_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000138_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000116_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000043_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000031_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000088_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000011_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000094_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000113_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000015_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000098_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000014_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000145_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000076_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000142_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000007_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000125_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000079_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000054_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000102_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000123_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000091_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000124_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000017_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000118_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000057_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000137_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000022_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000026_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000082_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000012_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000000_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000139_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000072_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000041_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000003_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000008_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000063_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000061_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000064_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000117_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000020_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000131_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000025_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000100_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000030_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000009_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000146_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000084_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000105_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000065_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000047_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000106_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000028_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000108_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000068_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000074_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000067_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000121_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000114_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000134_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000150_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000127_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000059_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000034_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000107_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000095_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000096_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000048_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000052_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000070_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000120_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000001_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000042_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000049_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000141_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000099_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000111_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000058_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000018_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000056_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000090_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000038_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000002_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000136_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000086_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000027_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000133_000019_leftImg8bit.png +Cityscapes/images/train/cologne/cologne_000035_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000063_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000051_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000067_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000100_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000068_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000052_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000000_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000021_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000091_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000017_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000087_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000098_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000059_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000070_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000071_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000054_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000034_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000086_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000083_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000060_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000106_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000102_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000104_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000072_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000016_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000105_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000074_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000108_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000064_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000007_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000012_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000011_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000097_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000065_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000061_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000053_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000103_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000036_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000062_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000048_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000090_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000082_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000022_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000015_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000050_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000084_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000069_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000005_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000055_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000006_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000101_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000004_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000037_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000032_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000024_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000075_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000080_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000040_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000002_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000042_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000049_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000046_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000023_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000014_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000047_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000029_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000025_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000008_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000057_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000066_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000073_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000020_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000001_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000041_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000076_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000043_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000018_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000085_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000026_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000003_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000035_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000031_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000039_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000038_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000056_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000033_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000028_000019_leftImg8bit.png +Cityscapes/images/train/erfurt/erfurt_000093_000019_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_011383_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_033683_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_001068_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_010505_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_025215_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_006169_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_020856_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_010733_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_002255_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_004580_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_020596_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_035650_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_020303_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_002478_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_035083_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_021663_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_019500_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_010280_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_009615_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_005876_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_030662_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_030010_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_009930_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_026006_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000001_002229_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_007098_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_035718_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000001_002353_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_006518_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_001294_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_026908_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_010860_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_012672_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000001_000537_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_027628_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_018445_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_003442_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_032540_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_028883_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000001_000054_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_031005_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_005138_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_023375_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_009690_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_022748_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_023856_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_021104_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_026305_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_024964_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_013352_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000001_001936_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_018294_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_018114_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_017950_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_007851_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000001_000168_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_031360_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_018575_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_007695_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_034930_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_014685_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_018720_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_033454_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_002972_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_034621_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_031623_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_015685_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_009191_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_000383_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_015928_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000001_000876_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_005686_leftImg8bit.png +Cityscapes/images/train/monchengladbach/monchengladbach_000000_013228_leftImg8bit.png diff --git a/datasets/data_path/eth.train b/datasets/data_path/eth.train new file mode 100644 index 0000000000000000000000000000000000000000..1a322180a189489398b103c9f703954456defd00 --- /dev/null +++ b/datasets/data_path/eth.train @@ -0,0 +1,2056 @@ +ETHZ/eth01/images/image_00000001_0.png +ETHZ/eth01/images/image_00000002_0.png +ETHZ/eth01/images/image_00000003_0.png +ETHZ/eth01/images/image_00000004_0.png +ETHZ/eth01/images/image_00000005_0.png +ETHZ/eth01/images/image_00000006_0.png +ETHZ/eth01/images/image_00000007_0.png +ETHZ/eth01/images/image_00000008_0.png +ETHZ/eth01/images/image_00000009_0.png +ETHZ/eth01/images/image_00000010_0.png +ETHZ/eth01/images/image_00000011_0.png +ETHZ/eth01/images/image_00000012_0.png +ETHZ/eth01/images/image_00000013_0.png +ETHZ/eth01/images/image_00000014_0.png +ETHZ/eth01/images/image_00000015_0.png +ETHZ/eth01/images/image_00000016_0.png +ETHZ/eth01/images/image_00000017_0.png +ETHZ/eth01/images/image_00000018_0.png +ETHZ/eth01/images/image_00000019_0.png +ETHZ/eth01/images/image_00000020_0.png +ETHZ/eth01/images/image_00000021_0.png +ETHZ/eth01/images/image_00000022_0.png +ETHZ/eth01/images/image_00000023_0.png +ETHZ/eth01/images/image_00000024_0.png +ETHZ/eth01/images/image_00000025_0.png +ETHZ/eth01/images/image_00000026_0.png +ETHZ/eth01/images/image_00000027_0.png +ETHZ/eth01/images/image_00000028_0.png +ETHZ/eth01/images/image_00000029_0.png +ETHZ/eth01/images/image_00000030_0.png +ETHZ/eth01/images/image_00000031_0.png +ETHZ/eth01/images/image_00000032_0.png +ETHZ/eth01/images/image_00000033_0.png +ETHZ/eth01/images/image_00000034_0.png +ETHZ/eth01/images/image_00000035_0.png +ETHZ/eth01/images/image_00000036_0.png +ETHZ/eth01/images/image_00000037_0.png +ETHZ/eth01/images/image_00000038_0.png +ETHZ/eth01/images/image_00000039_0.png +ETHZ/eth01/images/image_00000040_0.png +ETHZ/eth01/images/image_00000041_0.png +ETHZ/eth01/images/image_00000042_0.png +ETHZ/eth01/images/image_00000043_0.png +ETHZ/eth01/images/image_00000044_0.png +ETHZ/eth01/images/image_00000045_0.png +ETHZ/eth01/images/image_00000046_0.png +ETHZ/eth01/images/image_00000047_0.png +ETHZ/eth01/images/image_00000048_0.png +ETHZ/eth01/images/image_00000049_0.png +ETHZ/eth01/images/image_00000050_0.png +ETHZ/eth01/images/image_00000051_0.png +ETHZ/eth01/images/image_00000052_0.png +ETHZ/eth01/images/image_00000053_0.png +ETHZ/eth01/images/image_00000054_0.png +ETHZ/eth01/images/image_00000055_0.png +ETHZ/eth01/images/image_00000056_0.png +ETHZ/eth01/images/image_00000057_0.png +ETHZ/eth01/images/image_00000058_0.png +ETHZ/eth01/images/image_00000059_0.png +ETHZ/eth01/images/image_00000060_0.png +ETHZ/eth01/images/image_00000061_0.png +ETHZ/eth01/images/image_00000062_0.png +ETHZ/eth01/images/image_00000063_0.png +ETHZ/eth01/images/image_00000064_0.png +ETHZ/eth01/images/image_00000065_0.png +ETHZ/eth01/images/image_00000066_0.png +ETHZ/eth01/images/image_00000067_0.png +ETHZ/eth01/images/image_00000068_0.png +ETHZ/eth01/images/image_00000069_0.png +ETHZ/eth01/images/image_00000070_0.png +ETHZ/eth01/images/image_00000071_0.png +ETHZ/eth01/images/image_00000072_0.png +ETHZ/eth01/images/image_00000073_0.png +ETHZ/eth01/images/image_00000074_0.png +ETHZ/eth01/images/image_00000075_0.png +ETHZ/eth01/images/image_00000076_0.png +ETHZ/eth01/images/image_00000077_0.png +ETHZ/eth01/images/image_00000078_0.png +ETHZ/eth01/images/image_00000079_0.png +ETHZ/eth01/images/image_00000080_0.png +ETHZ/eth01/images/image_00000081_0.png +ETHZ/eth01/images/image_00000082_0.png +ETHZ/eth01/images/image_00000083_0.png +ETHZ/eth01/images/image_00000084_0.png +ETHZ/eth01/images/image_00000085_0.png +ETHZ/eth01/images/image_00000086_0.png +ETHZ/eth01/images/image_00000087_0.png +ETHZ/eth01/images/image_00000088_0.png +ETHZ/eth01/images/image_00000089_0.png +ETHZ/eth01/images/image_00000090_0.png +ETHZ/eth01/images/image_00000091_0.png +ETHZ/eth01/images/image_00000092_0.png +ETHZ/eth01/images/image_00000093_0.png +ETHZ/eth01/images/image_00000094_0.png +ETHZ/eth01/images/image_00000095_0.png +ETHZ/eth01/images/image_00000096_0.png +ETHZ/eth01/images/image_00000097_0.png +ETHZ/eth01/images/image_00000098_0.png +ETHZ/eth01/images/image_00000099_0.png +ETHZ/eth01/images/image_00000100_0.png +ETHZ/eth01/images/image_00000101_0.png +ETHZ/eth01/images/image_00000102_0.png +ETHZ/eth01/images/image_00000103_0.png +ETHZ/eth01/images/image_00000104_0.png +ETHZ/eth01/images/image_00000105_0.png +ETHZ/eth01/images/image_00000106_0.png +ETHZ/eth01/images/image_00000107_0.png +ETHZ/eth01/images/image_00000108_0.png +ETHZ/eth01/images/image_00000109_0.png +ETHZ/eth01/images/image_00000110_0.png +ETHZ/eth01/images/image_00000111_0.png +ETHZ/eth01/images/image_00000112_0.png +ETHZ/eth01/images/image_00000113_0.png +ETHZ/eth01/images/image_00000114_0.png +ETHZ/eth01/images/image_00000115_0.png +ETHZ/eth01/images/image_00000116_0.png +ETHZ/eth01/images/image_00000117_0.png +ETHZ/eth01/images/image_00000118_0.png +ETHZ/eth01/images/image_00000119_0.png +ETHZ/eth01/images/image_00000120_0.png +ETHZ/eth01/images/image_00000121_0.png +ETHZ/eth01/images/image_00000122_0.png +ETHZ/eth01/images/image_00000123_0.png +ETHZ/eth01/images/image_00000124_0.png +ETHZ/eth01/images/image_00000125_0.png +ETHZ/eth01/images/image_00000126_0.png +ETHZ/eth01/images/image_00000127_0.png +ETHZ/eth01/images/image_00000128_0.png +ETHZ/eth01/images/image_00000129_0.png +ETHZ/eth01/images/image_00000130_0.png +ETHZ/eth01/images/image_00000131_0.png +ETHZ/eth01/images/image_00000132_0.png +ETHZ/eth01/images/image_00000133_0.png +ETHZ/eth01/images/image_00000134_0.png +ETHZ/eth01/images/image_00000135_0.png +ETHZ/eth01/images/image_00000136_0.png +ETHZ/eth01/images/image_00000137_0.png +ETHZ/eth01/images/image_00000138_0.png +ETHZ/eth01/images/image_00000139_0.png +ETHZ/eth01/images/image_00000140_0.png +ETHZ/eth01/images/image_00000141_0.png +ETHZ/eth01/images/image_00000142_0.png +ETHZ/eth01/images/image_00000143_0.png +ETHZ/eth01/images/image_00000144_0.png +ETHZ/eth01/images/image_00000145_0.png +ETHZ/eth01/images/image_00000146_0.png +ETHZ/eth01/images/image_00000147_0.png +ETHZ/eth01/images/image_00000148_0.png +ETHZ/eth01/images/image_00000149_0.png +ETHZ/eth01/images/image_00000150_0.png +ETHZ/eth01/images/image_00000151_0.png +ETHZ/eth01/images/image_00000152_0.png +ETHZ/eth01/images/image_00000153_0.png +ETHZ/eth01/images/image_00000154_0.png +ETHZ/eth01/images/image_00000155_0.png +ETHZ/eth01/images/image_00000156_0.png +ETHZ/eth01/images/image_00000157_0.png +ETHZ/eth01/images/image_00000158_0.png +ETHZ/eth01/images/image_00000159_0.png +ETHZ/eth01/images/image_00000160_0.png +ETHZ/eth01/images/image_00000161_0.png +ETHZ/eth01/images/image_00000162_0.png +ETHZ/eth01/images/image_00000163_0.png +ETHZ/eth01/images/image_00000164_0.png +ETHZ/eth01/images/image_00000165_0.png +ETHZ/eth01/images/image_00000166_0.png +ETHZ/eth01/images/image_00000167_0.png +ETHZ/eth01/images/image_00000168_0.png +ETHZ/eth01/images/image_00000169_0.png +ETHZ/eth01/images/image_00000170_0.png +ETHZ/eth01/images/image_00000171_0.png +ETHZ/eth01/images/image_00000172_0.png +ETHZ/eth01/images/image_00000173_0.png +ETHZ/eth01/images/image_00000174_0.png +ETHZ/eth01/images/image_00000175_0.png +ETHZ/eth01/images/image_00000176_0.png +ETHZ/eth01/images/image_00000177_0.png +ETHZ/eth01/images/image_00000178_0.png +ETHZ/eth01/images/image_00000179_0.png +ETHZ/eth01/images/image_00000180_0.png +ETHZ/eth01/images/image_00000181_0.png +ETHZ/eth01/images/image_00000182_0.png +ETHZ/eth01/images/image_00000183_0.png +ETHZ/eth01/images/image_00000184_0.png +ETHZ/eth01/images/image_00000185_0.png +ETHZ/eth01/images/image_00000186_0.png +ETHZ/eth01/images/image_00000187_0.png +ETHZ/eth01/images/image_00000188_0.png +ETHZ/eth01/images/image_00000189_0.png +ETHZ/eth01/images/image_00000190_0.png +ETHZ/eth01/images/image_00000191_0.png +ETHZ/eth01/images/image_00000192_0.png +ETHZ/eth01/images/image_00000193_0.png +ETHZ/eth01/images/image_00000194_0.png +ETHZ/eth01/images/image_00000195_0.png +ETHZ/eth01/images/image_00000196_0.png +ETHZ/eth01/images/image_00000197_0.png +ETHZ/eth01/images/image_00000198_0.png +ETHZ/eth01/images/image_00000199_0.png +ETHZ/eth01/images/image_00000200_0.png +ETHZ/eth01/images/image_00000201_0.png +ETHZ/eth01/images/image_00000202_0.png +ETHZ/eth01/images/image_00000203_0.png +ETHZ/eth01/images/image_00000204_0.png +ETHZ/eth01/images/image_00000205_0.png +ETHZ/eth01/images/image_00000206_0.png +ETHZ/eth01/images/image_00000207_0.png +ETHZ/eth01/images/image_00000208_0.png +ETHZ/eth01/images/image_00000209_0.png +ETHZ/eth01/images/image_00000210_0.png +ETHZ/eth01/images/image_00000211_0.png +ETHZ/eth01/images/image_00000212_0.png +ETHZ/eth01/images/image_00000213_0.png +ETHZ/eth01/images/image_00000214_0.png +ETHZ/eth01/images/image_00000215_0.png +ETHZ/eth01/images/image_00000216_0.png +ETHZ/eth01/images/image_00000217_0.png +ETHZ/eth01/images/image_00000218_0.png +ETHZ/eth01/images/image_00000219_0.png +ETHZ/eth01/images/image_00000220_0.png +ETHZ/eth01/images/image_00000221_0.png +ETHZ/eth01/images/image_00000222_0.png +ETHZ/eth01/images/image_00000223_0.png +ETHZ/eth01/images/image_00000224_0.png +ETHZ/eth01/images/image_00000225_0.png +ETHZ/eth01/images/image_00000226_0.png +ETHZ/eth01/images/image_00000227_0.png +ETHZ/eth01/images/image_00000228_0.png +ETHZ/eth01/images/image_00000229_0.png +ETHZ/eth01/images/image_00000230_0.png +ETHZ/eth01/images/image_00000231_0.png +ETHZ/eth01/images/image_00000232_0.png +ETHZ/eth01/images/image_00000233_0.png +ETHZ/eth01/images/image_00000234_0.png +ETHZ/eth01/images/image_00000235_0.png +ETHZ/eth01/images/image_00000236_0.png +ETHZ/eth01/images/image_00000237_0.png +ETHZ/eth01/images/image_00000238_0.png +ETHZ/eth01/images/image_00000239_0.png +ETHZ/eth01/images/image_00000240_0.png +ETHZ/eth01/images/image_00000241_0.png +ETHZ/eth01/images/image_00000242_0.png +ETHZ/eth01/images/image_00000243_0.png +ETHZ/eth01/images/image_00000244_0.png +ETHZ/eth01/images/image_00000245_0.png +ETHZ/eth01/images/image_00000246_0.png +ETHZ/eth01/images/image_00000247_0.png +ETHZ/eth01/images/image_00000248_0.png +ETHZ/eth01/images/image_00000249_0.png +ETHZ/eth01/images/image_00000250_0.png +ETHZ/eth01/images/image_00000251_0.png +ETHZ/eth01/images/image_00000252_0.png +ETHZ/eth01/images/image_00000253_0.png +ETHZ/eth01/images/image_00000254_0.png +ETHZ/eth01/images/image_00000255_0.png +ETHZ/eth01/images/image_00000256_0.png +ETHZ/eth01/images/image_00000257_0.png +ETHZ/eth01/images/image_00000258_0.png +ETHZ/eth01/images/image_00000259_0.png +ETHZ/eth01/images/image_00000260_0.png +ETHZ/eth01/images/image_00000261_0.png +ETHZ/eth01/images/image_00000262_0.png +ETHZ/eth01/images/image_00000263_0.png +ETHZ/eth01/images/image_00000264_0.png +ETHZ/eth01/images/image_00000265_0.png +ETHZ/eth01/images/image_00000266_0.png +ETHZ/eth01/images/image_00000267_0.png +ETHZ/eth01/images/image_00000268_0.png +ETHZ/eth01/images/image_00000269_0.png +ETHZ/eth01/images/image_00000270_0.png +ETHZ/eth01/images/image_00000271_0.png +ETHZ/eth01/images/image_00000272_0.png +ETHZ/eth01/images/image_00000273_0.png +ETHZ/eth01/images/image_00000274_0.png +ETHZ/eth01/images/image_00000275_0.png +ETHZ/eth01/images/image_00000276_0.png +ETHZ/eth01/images/image_00000277_0.png +ETHZ/eth01/images/image_00000278_0.png +ETHZ/eth01/images/image_00000279_0.png +ETHZ/eth01/images/image_00000280_0.png +ETHZ/eth01/images/image_00000281_0.png +ETHZ/eth01/images/image_00000282_0.png +ETHZ/eth01/images/image_00000283_0.png +ETHZ/eth01/images/image_00000284_0.png +ETHZ/eth01/images/image_00000285_0.png +ETHZ/eth01/images/image_00000286_0.png +ETHZ/eth01/images/image_00000287_0.png +ETHZ/eth01/images/image_00000288_0.png +ETHZ/eth01/images/image_00000289_0.png +ETHZ/eth01/images/image_00000290_0.png +ETHZ/eth01/images/image_00000291_0.png +ETHZ/eth01/images/image_00000292_0.png +ETHZ/eth01/images/image_00000293_0.png +ETHZ/eth01/images/image_00000294_0.png +ETHZ/eth01/images/image_00000295_0.png +ETHZ/eth01/images/image_00000296_0.png +ETHZ/eth01/images/image_00000297_0.png +ETHZ/eth01/images/image_00000298_0.png +ETHZ/eth01/images/image_00000299_0.png +ETHZ/eth01/images/image_00000300_0.png +ETHZ/eth01/images/image_00000301_0.png +ETHZ/eth01/images/image_00000302_0.png +ETHZ/eth01/images/image_00000303_0.png +ETHZ/eth01/images/image_00000304_0.png +ETHZ/eth01/images/image_00000305_0.png +ETHZ/eth01/images/image_00000306_0.png +ETHZ/eth01/images/image_00000307_0.png +ETHZ/eth01/images/image_00000308_0.png +ETHZ/eth01/images/image_00000309_0.png +ETHZ/eth01/images/image_00000310_0.png +ETHZ/eth01/images/image_00000311_0.png +ETHZ/eth01/images/image_00000312_0.png +ETHZ/eth01/images/image_00000313_0.png +ETHZ/eth01/images/image_00000314_0.png +ETHZ/eth01/images/image_00000315_0.png +ETHZ/eth01/images/image_00000316_0.png +ETHZ/eth01/images/image_00000317_0.png +ETHZ/eth01/images/image_00000318_0.png +ETHZ/eth01/images/image_00000319_0.png +ETHZ/eth01/images/image_00000320_0.png +ETHZ/eth01/images/image_00000321_0.png +ETHZ/eth01/images/image_00000322_0.png +ETHZ/eth01/images/image_00000323_0.png +ETHZ/eth01/images/image_00000324_0.png +ETHZ/eth01/images/image_00000325_0.png +ETHZ/eth01/images/image_00000326_0.png +ETHZ/eth01/images/image_00000327_0.png +ETHZ/eth01/images/image_00000328_0.png +ETHZ/eth01/images/image_00000329_0.png +ETHZ/eth01/images/image_00000330_0.png +ETHZ/eth01/images/image_00000331_0.png +ETHZ/eth01/images/image_00000332_0.png +ETHZ/eth01/images/image_00000333_0.png +ETHZ/eth01/images/image_00000334_0.png +ETHZ/eth01/images/image_00000335_0.png +ETHZ/eth01/images/image_00000336_0.png +ETHZ/eth01/images/image_00000337_0.png +ETHZ/eth01/images/image_00000338_0.png +ETHZ/eth01/images/image_00000339_0.png +ETHZ/eth01/images/image_00000340_0.png +ETHZ/eth01/images/image_00000341_0.png +ETHZ/eth01/images/image_00000342_0.png +ETHZ/eth01/images/image_00000343_0.png +ETHZ/eth01/images/image_00000344_0.png +ETHZ/eth01/images/image_00000345_0.png +ETHZ/eth01/images/image_00000346_0.png +ETHZ/eth01/images/image_00000347_0.png +ETHZ/eth01/images/image_00000348_0.png +ETHZ/eth01/images/image_00000349_0.png +ETHZ/eth01/images/image_00000350_0.png +ETHZ/eth01/images/image_00000351_0.png +ETHZ/eth01/images/image_00000352_0.png +ETHZ/eth01/images/image_00000353_0.png +ETHZ/eth01/images/image_00000354_0.png +ETHZ/eth01/images/image_00000355_0.png +ETHZ/eth01/images/image_00000356_0.png +ETHZ/eth01/images/image_00000357_0.png +ETHZ/eth01/images/image_00000358_0.png +ETHZ/eth01/images/image_00000359_0.png +ETHZ/eth01/images/image_00000360_0.png +ETHZ/eth01/images/image_00000361_0.png +ETHZ/eth01/images/image_00000362_0.png +ETHZ/eth01/images/image_00000363_0.png +ETHZ/eth01/images/image_00000364_0.png +ETHZ/eth01/images/image_00000365_0.png +ETHZ/eth01/images/image_00000366_0.png +ETHZ/eth01/images/image_00000367_0.png +ETHZ/eth01/images/image_00000368_0.png +ETHZ/eth01/images/image_00000369_0.png +ETHZ/eth01/images/image_00000370_0.png +ETHZ/eth01/images/image_00000371_0.png +ETHZ/eth01/images/image_00000372_0.png +ETHZ/eth01/images/image_00000373_0.png +ETHZ/eth01/images/image_00000374_0.png +ETHZ/eth01/images/image_00000375_0.png +ETHZ/eth01/images/image_00000376_0.png +ETHZ/eth01/images/image_00000377_0.png +ETHZ/eth01/images/image_00000378_0.png +ETHZ/eth01/images/image_00000379_0.png +ETHZ/eth01/images/image_00000380_0.png +ETHZ/eth01/images/image_00000381_0.png +ETHZ/eth01/images/image_00000382_0.png +ETHZ/eth01/images/image_00000383_0.png +ETHZ/eth01/images/image_00000384_0.png +ETHZ/eth01/images/image_00000385_0.png +ETHZ/eth01/images/image_00000386_0.png +ETHZ/eth01/images/image_00000387_0.png +ETHZ/eth01/images/image_00000388_0.png +ETHZ/eth01/images/image_00000389_0.png +ETHZ/eth01/images/image_00000390_0.png +ETHZ/eth01/images/image_00000391_0.png +ETHZ/eth01/images/image_00000392_0.png +ETHZ/eth01/images/image_00000393_0.png +ETHZ/eth01/images/image_00000394_0.png +ETHZ/eth01/images/image_00000395_0.png +ETHZ/eth01/images/image_00000396_0.png +ETHZ/eth01/images/image_00000397_0.png +ETHZ/eth01/images/image_00000398_0.png +ETHZ/eth01/images/image_00000399_0.png +ETHZ/eth01/images/image_00000400_0.png +ETHZ/eth01/images/image_00000401_0.png +ETHZ/eth01/images/image_00000402_0.png +ETHZ/eth01/images/image_00000403_0.png +ETHZ/eth01/images/image_00000404_0.png +ETHZ/eth01/images/image_00000405_0.png +ETHZ/eth01/images/image_00000406_0.png +ETHZ/eth01/images/image_00000407_0.png +ETHZ/eth01/images/image_00000408_0.png +ETHZ/eth01/images/image_00000409_0.png +ETHZ/eth01/images/image_00000410_0.png +ETHZ/eth01/images/image_00000411_0.png +ETHZ/eth01/images/image_00000412_0.png +ETHZ/eth01/images/image_00000413_0.png +ETHZ/eth01/images/image_00000414_0.png +ETHZ/eth01/images/image_00000415_0.png +ETHZ/eth01/images/image_00000416_0.png +ETHZ/eth01/images/image_00000417_0.png +ETHZ/eth01/images/image_00000418_0.png +ETHZ/eth01/images/image_00000419_0.png +ETHZ/eth01/images/image_00000420_0.png +ETHZ/eth01/images/image_00000421_0.png +ETHZ/eth01/images/image_00000422_0.png +ETHZ/eth01/images/image_00000423_0.png +ETHZ/eth01/images/image_00000424_0.png +ETHZ/eth01/images/image_00000425_0.png +ETHZ/eth01/images/image_00000426_0.png +ETHZ/eth01/images/image_00000427_0.png +ETHZ/eth01/images/image_00000428_0.png +ETHZ/eth01/images/image_00000429_0.png +ETHZ/eth01/images/image_00000430_0.png +ETHZ/eth01/images/image_00000431_0.png +ETHZ/eth01/images/image_00000432_0.png +ETHZ/eth01/images/image_00000433_0.png +ETHZ/eth01/images/image_00000434_0.png +ETHZ/eth01/images/image_00000435_0.png +ETHZ/eth01/images/image_00000436_0.png +ETHZ/eth01/images/image_00000437_0.png +ETHZ/eth01/images/image_00000438_0.png +ETHZ/eth01/images/image_00000439_0.png +ETHZ/eth01/images/image_00000440_0.png +ETHZ/eth01/images/image_00000441_0.png +ETHZ/eth01/images/image_00000442_0.png +ETHZ/eth01/images/image_00000443_0.png +ETHZ/eth01/images/image_00000444_0.png +ETHZ/eth01/images/image_00000445_0.png +ETHZ/eth01/images/image_00000446_0.png +ETHZ/eth01/images/image_00000447_0.png +ETHZ/eth01/images/image_00000448_0.png +ETHZ/eth01/images/image_00000449_0.png +ETHZ/eth01/images/image_00000450_0.png +ETHZ/eth01/images/image_00000451_0.png +ETHZ/eth01/images/image_00000452_0.png +ETHZ/eth01/images/image_00000453_0.png +ETHZ/eth01/images/image_00000454_0.png +ETHZ/eth01/images/image_00000455_0.png +ETHZ/eth01/images/image_00000456_0.png +ETHZ/eth01/images/image_00000457_0.png +ETHZ/eth01/images/image_00000458_0.png +ETHZ/eth01/images/image_00000459_0.png +ETHZ/eth01/images/image_00000460_0.png +ETHZ/eth01/images/image_00000461_0.png +ETHZ/eth01/images/image_00000462_0.png +ETHZ/eth01/images/image_00000463_0.png +ETHZ/eth01/images/image_00000464_0.png +ETHZ/eth01/images/image_00000465_0.png +ETHZ/eth01/images/image_00000466_0.png +ETHZ/eth01/images/image_00000467_0.png +ETHZ/eth01/images/image_00000468_0.png +ETHZ/eth01/images/image_00000469_0.png +ETHZ/eth01/images/image_00000470_0.png +ETHZ/eth01/images/image_00000471_0.png +ETHZ/eth01/images/image_00000472_0.png +ETHZ/eth01/images/image_00000473_0.png +ETHZ/eth01/images/image_00000474_0.png +ETHZ/eth01/images/image_00000475_0.png +ETHZ/eth01/images/image_00000476_0.png +ETHZ/eth01/images/image_00000477_0.png +ETHZ/eth01/images/image_00000478_0.png +ETHZ/eth01/images/image_00000479_0.png +ETHZ/eth01/images/image_00000480_0.png +ETHZ/eth01/images/image_00000481_0.png +ETHZ/eth01/images/image_00000482_0.png +ETHZ/eth01/images/image_00000483_0.png +ETHZ/eth01/images/image_00000484_0.png +ETHZ/eth01/images/image_00000485_0.png +ETHZ/eth01/images/image_00000486_0.png +ETHZ/eth01/images/image_00000487_0.png +ETHZ/eth01/images/image_00000488_0.png +ETHZ/eth01/images/image_00000489_0.png +ETHZ/eth01/images/image_00000490_0.png +ETHZ/eth01/images/image_00000491_0.png +ETHZ/eth01/images/image_00000492_0.png +ETHZ/eth01/images/image_00000493_0.png +ETHZ/eth01/images/image_00000494_0.png +ETHZ/eth01/images/image_00000495_0.png +ETHZ/eth01/images/image_00000496_0.png +ETHZ/eth01/images/image_00000497_0.png +ETHZ/eth01/images/image_00000498_0.png +ETHZ/eth01/images/image_00000499_0.png +ETHZ/eth01/images/image_00000500_0.png +ETHZ/eth01/images/image_00000501_0.png +ETHZ/eth01/images/image_00000502_0.png +ETHZ/eth01/images/image_00000503_0.png +ETHZ/eth01/images/image_00000504_0.png +ETHZ/eth01/images/image_00000505_0.png +ETHZ/eth01/images/image_00000506_0.png +ETHZ/eth01/images/image_00000507_0.png +ETHZ/eth01/images/image_00000508_0.png +ETHZ/eth01/images/image_00000509_0.png +ETHZ/eth01/images/image_00000510_0.png +ETHZ/eth01/images/image_00000511_0.png +ETHZ/eth01/images/image_00000512_0.png +ETHZ/eth01/images/image_00000513_0.png +ETHZ/eth01/images/image_00000514_0.png +ETHZ/eth01/images/image_00000515_0.png +ETHZ/eth01/images/image_00000516_0.png +ETHZ/eth01/images/image_00000517_0.png +ETHZ/eth01/images/image_00000518_0.png +ETHZ/eth01/images/image_00000519_0.png +ETHZ/eth01/images/image_00000520_0.png +ETHZ/eth01/images/image_00000521_0.png +ETHZ/eth01/images/image_00000522_0.png +ETHZ/eth01/images/image_00000523_0.png +ETHZ/eth01/images/image_00000524_0.png +ETHZ/eth01/images/image_00000525_0.png +ETHZ/eth01/images/image_00000526_0.png +ETHZ/eth01/images/image_00000527_0.png +ETHZ/eth01/images/image_00000528_0.png +ETHZ/eth01/images/image_00000529_0.png +ETHZ/eth01/images/image_00000530_0.png +ETHZ/eth01/images/image_00000531_0.png +ETHZ/eth01/images/image_00000532_0.png +ETHZ/eth01/images/image_00000533_0.png +ETHZ/eth01/images/image_00000534_0.png +ETHZ/eth01/images/image_00000535_0.png +ETHZ/eth01/images/image_00000536_0.png +ETHZ/eth01/images/image_00000537_0.png +ETHZ/eth01/images/image_00000538_0.png +ETHZ/eth01/images/image_00000539_0.png +ETHZ/eth01/images/image_00000540_0.png +ETHZ/eth01/images/image_00000541_0.png +ETHZ/eth01/images/image_00000542_0.png +ETHZ/eth01/images/image_00000543_0.png +ETHZ/eth01/images/image_00000544_0.png +ETHZ/eth01/images/image_00000545_0.png +ETHZ/eth01/images/image_00000546_0.png +ETHZ/eth01/images/image_00000547_0.png +ETHZ/eth01/images/image_00000548_0.png +ETHZ/eth01/images/image_00000549_0.png +ETHZ/eth01/images/image_00000550_0.png +ETHZ/eth01/images/image_00000551_0.png +ETHZ/eth01/images/image_00000552_0.png +ETHZ/eth01/images/image_00000553_0.png +ETHZ/eth01/images/image_00000554_0.png +ETHZ/eth01/images/image_00000555_0.png +ETHZ/eth01/images/image_00000556_0.png +ETHZ/eth01/images/image_00000557_0.png +ETHZ/eth01/images/image_00000558_0.png +ETHZ/eth01/images/image_00000559_0.png +ETHZ/eth01/images/image_00000560_0.png +ETHZ/eth01/images/image_00000561_0.png +ETHZ/eth01/images/image_00000562_0.png +ETHZ/eth01/images/image_00000563_0.png +ETHZ/eth01/images/image_00000564_0.png +ETHZ/eth01/images/image_00000565_0.png +ETHZ/eth01/images/image_00000566_0.png +ETHZ/eth01/images/image_00000567_0.png +ETHZ/eth01/images/image_00000568_0.png +ETHZ/eth01/images/image_00000569_0.png +ETHZ/eth01/images/image_00000570_0.png +ETHZ/eth01/images/image_00000571_0.png +ETHZ/eth01/images/image_00000572_0.png +ETHZ/eth01/images/image_00000573_0.png +ETHZ/eth01/images/image_00000574_0.png +ETHZ/eth01/images/image_00000575_0.png +ETHZ/eth01/images/image_00000576_0.png +ETHZ/eth01/images/image_00000577_0.png +ETHZ/eth01/images/image_00000578_0.png +ETHZ/eth01/images/image_00000579_0.png +ETHZ/eth01/images/image_00000580_0.png +ETHZ/eth01/images/image_00000581_0.png +ETHZ/eth01/images/image_00000582_0.png +ETHZ/eth01/images/image_00000583_0.png +ETHZ/eth01/images/image_00000584_0.png +ETHZ/eth01/images/image_00000585_0.png +ETHZ/eth01/images/image_00000586_0.png +ETHZ/eth01/images/image_00000587_0.png +ETHZ/eth01/images/image_00000588_0.png +ETHZ/eth01/images/image_00000589_0.png +ETHZ/eth01/images/image_00000590_0.png +ETHZ/eth01/images/image_00000591_0.png +ETHZ/eth01/images/image_00000592_0.png +ETHZ/eth01/images/image_00000593_0.png +ETHZ/eth01/images/image_00000594_0.png +ETHZ/eth01/images/image_00000595_0.png +ETHZ/eth01/images/image_00000596_0.png +ETHZ/eth01/images/image_00000597_0.png +ETHZ/eth01/images/image_00000598_0.png +ETHZ/eth01/images/image_00000599_0.png +ETHZ/eth01/images/image_00000600_0.png +ETHZ/eth01/images/image_00000601_0.png +ETHZ/eth01/images/image_00000602_0.png +ETHZ/eth01/images/image_00000603_0.png +ETHZ/eth01/images/image_00000604_0.png +ETHZ/eth01/images/image_00000605_0.png +ETHZ/eth01/images/image_00000606_0.png +ETHZ/eth01/images/image_00000607_0.png +ETHZ/eth01/images/image_00000608_0.png +ETHZ/eth01/images/image_00000609_0.png +ETHZ/eth01/images/image_00000610_0.png +ETHZ/eth01/images/image_00000611_0.png +ETHZ/eth01/images/image_00000612_0.png +ETHZ/eth01/images/image_00000613_0.png +ETHZ/eth01/images/image_00000614_0.png +ETHZ/eth01/images/image_00000615_0.png +ETHZ/eth01/images/image_00000616_0.png +ETHZ/eth01/images/image_00000617_0.png +ETHZ/eth01/images/image_00000618_0.png +ETHZ/eth01/images/image_00000619_0.png +ETHZ/eth01/images/image_00000620_0.png +ETHZ/eth01/images/image_00000621_0.png +ETHZ/eth01/images/image_00000622_0.png +ETHZ/eth01/images/image_00000623_0.png +ETHZ/eth01/images/image_00000624_0.png +ETHZ/eth01/images/image_00000625_0.png +ETHZ/eth01/images/image_00000626_0.png +ETHZ/eth01/images/image_00000627_0.png +ETHZ/eth01/images/image_00000628_0.png +ETHZ/eth01/images/image_00000629_0.png +ETHZ/eth01/images/image_00000630_0.png +ETHZ/eth01/images/image_00000631_0.png +ETHZ/eth01/images/image_00000632_0.png +ETHZ/eth01/images/image_00000633_0.png +ETHZ/eth01/images/image_00000634_0.png +ETHZ/eth01/images/image_00000635_0.png +ETHZ/eth01/images/image_00000636_0.png +ETHZ/eth01/images/image_00000637_0.png +ETHZ/eth01/images/image_00000638_0.png +ETHZ/eth01/images/image_00000639_0.png +ETHZ/eth01/images/image_00000640_0.png +ETHZ/eth01/images/image_00000641_0.png +ETHZ/eth01/images/image_00000642_0.png +ETHZ/eth01/images/image_00000643_0.png +ETHZ/eth01/images/image_00000644_0.png +ETHZ/eth01/images/image_00000645_0.png +ETHZ/eth01/images/image_00000646_0.png +ETHZ/eth01/images/image_00000647_0.png +ETHZ/eth01/images/image_00000648_0.png +ETHZ/eth01/images/image_00000649_0.png +ETHZ/eth01/images/image_00000650_0.png +ETHZ/eth01/images/image_00000651_0.png +ETHZ/eth01/images/image_00000652_0.png +ETHZ/eth01/images/image_00000653_0.png +ETHZ/eth01/images/image_00000654_0.png +ETHZ/eth01/images/image_00000655_0.png +ETHZ/eth01/images/image_00000656_0.png +ETHZ/eth01/images/image_00000657_0.png +ETHZ/eth01/images/image_00000658_0.png +ETHZ/eth01/images/image_00000659_0.png +ETHZ/eth01/images/image_00000660_0.png +ETHZ/eth01/images/image_00000661_0.png +ETHZ/eth01/images/image_00000662_0.png +ETHZ/eth01/images/image_00000663_0.png +ETHZ/eth01/images/image_00000664_0.png +ETHZ/eth01/images/image_00000665_0.png +ETHZ/eth01/images/image_00000666_0.png +ETHZ/eth01/images/image_00000667_0.png +ETHZ/eth01/images/image_00000668_0.png +ETHZ/eth01/images/image_00000669_0.png +ETHZ/eth01/images/image_00000670_0.png +ETHZ/eth01/images/image_00000671_0.png +ETHZ/eth01/images/image_00000672_0.png +ETHZ/eth01/images/image_00000673_0.png +ETHZ/eth01/images/image_00000674_0.png +ETHZ/eth01/images/image_00000675_0.png +ETHZ/eth01/images/image_00000676_0.png +ETHZ/eth01/images/image_00000677_0.png +ETHZ/eth01/images/image_00000678_0.png +ETHZ/eth01/images/image_00000679_0.png +ETHZ/eth01/images/image_00000680_0.png +ETHZ/eth01/images/image_00000681_0.png +ETHZ/eth01/images/image_00000682_0.png +ETHZ/eth01/images/image_00000683_0.png +ETHZ/eth01/images/image_00000684_0.png +ETHZ/eth01/images/image_00000685_0.png +ETHZ/eth01/images/image_00000686_0.png +ETHZ/eth01/images/image_00000687_0.png +ETHZ/eth01/images/image_00000688_0.png +ETHZ/eth01/images/image_00000689_0.png +ETHZ/eth01/images/image_00000690_0.png +ETHZ/eth01/images/image_00000691_0.png +ETHZ/eth01/images/image_00000692_0.png +ETHZ/eth01/images/image_00000693_0.png +ETHZ/eth01/images/image_00000694_0.png +ETHZ/eth01/images/image_00000695_0.png +ETHZ/eth01/images/image_00000696_0.png +ETHZ/eth01/images/image_00000697_0.png +ETHZ/eth01/images/image_00000698_0.png +ETHZ/eth01/images/image_00000699_0.png +ETHZ/eth01/images/image_00000700_0.png +ETHZ/eth01/images/image_00000701_0.png +ETHZ/eth01/images/image_00000702_0.png +ETHZ/eth01/images/image_00000703_0.png +ETHZ/eth01/images/image_00000704_0.png +ETHZ/eth01/images/image_00000705_0.png +ETHZ/eth01/images/image_00000706_0.png +ETHZ/eth01/images/image_00000707_0.png +ETHZ/eth01/images/image_00000708_0.png +ETHZ/eth01/images/image_00000709_0.png +ETHZ/eth01/images/image_00000710_0.png +ETHZ/eth01/images/image_00000711_0.png +ETHZ/eth01/images/image_00000712_0.png +ETHZ/eth01/images/image_00000713_0.png +ETHZ/eth01/images/image_00000714_0.png +ETHZ/eth01/images/image_00000715_0.png +ETHZ/eth01/images/image_00000716_0.png +ETHZ/eth01/images/image_00000717_0.png +ETHZ/eth01/images/image_00000718_0.png +ETHZ/eth01/images/image_00000719_0.png +ETHZ/eth01/images/image_00000720_0.png +ETHZ/eth01/images/image_00000721_0.png +ETHZ/eth01/images/image_00000722_0.png +ETHZ/eth01/images/image_00000723_0.png +ETHZ/eth01/images/image_00000724_0.png +ETHZ/eth01/images/image_00000725_0.png +ETHZ/eth01/images/image_00000726_0.png +ETHZ/eth01/images/image_00000727_0.png +ETHZ/eth01/images/image_00000728_0.png +ETHZ/eth01/images/image_00000729_0.png +ETHZ/eth01/images/image_00000730_0.png +ETHZ/eth01/images/image_00000731_0.png +ETHZ/eth01/images/image_00000732_0.png +ETHZ/eth01/images/image_00000733_0.png +ETHZ/eth01/images/image_00000734_0.png +ETHZ/eth01/images/image_00000735_0.png +ETHZ/eth01/images/image_00000736_0.png +ETHZ/eth01/images/image_00000737_0.png +ETHZ/eth01/images/image_00000738_0.png +ETHZ/eth01/images/image_00000739_0.png +ETHZ/eth01/images/image_00000740_0.png +ETHZ/eth01/images/image_00000741_0.png +ETHZ/eth01/images/image_00000742_0.png +ETHZ/eth01/images/image_00000743_0.png +ETHZ/eth01/images/image_00000744_0.png +ETHZ/eth01/images/image_00000745_0.png +ETHZ/eth01/images/image_00000746_0.png +ETHZ/eth01/images/image_00000747_0.png +ETHZ/eth01/images/image_00000748_0.png +ETHZ/eth01/images/image_00000749_0.png +ETHZ/eth01/images/image_00000750_0.png +ETHZ/eth01/images/image_00000751_0.png +ETHZ/eth01/images/image_00000752_0.png +ETHZ/eth01/images/image_00000753_0.png +ETHZ/eth01/images/image_00000754_0.png +ETHZ/eth01/images/image_00000755_0.png +ETHZ/eth01/images/image_00000756_0.png +ETHZ/eth01/images/image_00000757_0.png +ETHZ/eth01/images/image_00000758_0.png +ETHZ/eth01/images/image_00000759_0.png +ETHZ/eth01/images/image_00000760_0.png +ETHZ/eth01/images/image_00000761_0.png +ETHZ/eth01/images/image_00000762_0.png +ETHZ/eth01/images/image_00000763_0.png +ETHZ/eth01/images/image_00000764_0.png +ETHZ/eth01/images/image_00000765_0.png +ETHZ/eth01/images/image_00000766_0.png +ETHZ/eth01/images/image_00000767_0.png +ETHZ/eth01/images/image_00000768_0.png +ETHZ/eth01/images/image_00000769_0.png +ETHZ/eth01/images/image_00000770_0.png +ETHZ/eth01/images/image_00000771_0.png +ETHZ/eth01/images/image_00000772_0.png +ETHZ/eth01/images/image_00000773_0.png +ETHZ/eth01/images/image_00000774_0.png +ETHZ/eth01/images/image_00000775_0.png +ETHZ/eth01/images/image_00000776_0.png +ETHZ/eth01/images/image_00000777_0.png +ETHZ/eth01/images/image_00000778_0.png +ETHZ/eth01/images/image_00000779_0.png +ETHZ/eth01/images/image_00000780_0.png +ETHZ/eth01/images/image_00000781_0.png +ETHZ/eth01/images/image_00000782_0.png +ETHZ/eth01/images/image_00000783_0.png +ETHZ/eth01/images/image_00000784_0.png +ETHZ/eth01/images/image_00000785_0.png +ETHZ/eth01/images/image_00000786_0.png +ETHZ/eth01/images/image_00000787_0.png +ETHZ/eth01/images/image_00000788_0.png +ETHZ/eth01/images/image_00000789_0.png +ETHZ/eth01/images/image_00000790_0.png +ETHZ/eth01/images/image_00000791_0.png +ETHZ/eth01/images/image_00000792_0.png +ETHZ/eth01/images/image_00000793_0.png +ETHZ/eth01/images/image_00000794_0.png +ETHZ/eth01/images/image_00000795_0.png +ETHZ/eth01/images/image_00000796_0.png +ETHZ/eth01/images/image_00000797_0.png +ETHZ/eth01/images/image_00000798_0.png +ETHZ/eth01/images/image_00000799_0.png +ETHZ/eth01/images/image_00000800_0.png +ETHZ/eth01/images/image_00000801_0.png +ETHZ/eth01/images/image_00000802_0.png +ETHZ/eth01/images/image_00000803_0.png +ETHZ/eth01/images/image_00000804_0.png +ETHZ/eth01/images/image_00000805_0.png +ETHZ/eth01/images/image_00000806_0.png +ETHZ/eth01/images/image_00000807_0.png +ETHZ/eth01/images/image_00000808_0.png +ETHZ/eth01/images/image_00000809_0.png +ETHZ/eth01/images/image_00000810_0.png +ETHZ/eth01/images/image_00000811_0.png +ETHZ/eth01/images/image_00000812_0.png +ETHZ/eth01/images/image_00000813_0.png +ETHZ/eth01/images/image_00000814_0.png +ETHZ/eth01/images/image_00000815_0.png +ETHZ/eth01/images/image_00000816_0.png +ETHZ/eth01/images/image_00000817_0.png +ETHZ/eth01/images/image_00000818_0.png +ETHZ/eth01/images/image_00000819_0.png +ETHZ/eth01/images/image_00000820_0.png +ETHZ/eth01/images/image_00000821_0.png +ETHZ/eth01/images/image_00000822_0.png +ETHZ/eth01/images/image_00000823_0.png +ETHZ/eth01/images/image_00000824_0.png +ETHZ/eth01/images/image_00000825_0.png +ETHZ/eth01/images/image_00000826_0.png +ETHZ/eth01/images/image_00000827_0.png +ETHZ/eth01/images/image_00000828_0.png +ETHZ/eth01/images/image_00000829_0.png +ETHZ/eth01/images/image_00000830_0.png +ETHZ/eth01/images/image_00000831_0.png +ETHZ/eth01/images/image_00000832_0.png +ETHZ/eth01/images/image_00000833_0.png +ETHZ/eth01/images/image_00000834_0.png +ETHZ/eth01/images/image_00000835_0.png +ETHZ/eth01/images/image_00000836_0.png +ETHZ/eth01/images/image_00000837_0.png +ETHZ/eth01/images/image_00000838_0.png +ETHZ/eth01/images/image_00000839_0.png +ETHZ/eth01/images/image_00000840_0.png +ETHZ/eth01/images/image_00000841_0.png +ETHZ/eth01/images/image_00000842_0.png +ETHZ/eth01/images/image_00000843_0.png +ETHZ/eth01/images/image_00000844_0.png +ETHZ/eth01/images/image_00000845_0.png +ETHZ/eth01/images/image_00000846_0.png +ETHZ/eth01/images/image_00000847_0.png +ETHZ/eth01/images/image_00000848_0.png +ETHZ/eth01/images/image_00000849_0.png +ETHZ/eth01/images/image_00000850_0.png +ETHZ/eth01/images/image_00000851_0.png +ETHZ/eth01/images/image_00000852_0.png +ETHZ/eth01/images/image_00000853_0.png +ETHZ/eth01/images/image_00000854_0.png +ETHZ/eth01/images/image_00000855_0.png +ETHZ/eth01/images/image_00000856_0.png +ETHZ/eth01/images/image_00000857_0.png +ETHZ/eth01/images/image_00000858_0.png +ETHZ/eth01/images/image_00000859_0.png +ETHZ/eth01/images/image_00000860_0.png +ETHZ/eth01/images/image_00000861_0.png +ETHZ/eth01/images/image_00000862_0.png +ETHZ/eth01/images/image_00000863_0.png +ETHZ/eth01/images/image_00000864_0.png +ETHZ/eth01/images/image_00000865_0.png +ETHZ/eth01/images/image_00000866_0.png +ETHZ/eth01/images/image_00000867_0.png +ETHZ/eth01/images/image_00000868_0.png +ETHZ/eth01/images/image_00000869_0.png +ETHZ/eth01/images/image_00000870_0.png +ETHZ/eth01/images/image_00000871_0.png +ETHZ/eth01/images/image_00000872_0.png +ETHZ/eth01/images/image_00000873_0.png +ETHZ/eth01/images/image_00000874_0.png +ETHZ/eth01/images/image_00000875_0.png +ETHZ/eth01/images/image_00000876_0.png +ETHZ/eth01/images/image_00000877_0.png +ETHZ/eth01/images/image_00000878_0.png +ETHZ/eth01/images/image_00000879_0.png +ETHZ/eth01/images/image_00000880_0.png +ETHZ/eth01/images/image_00000881_0.png +ETHZ/eth01/images/image_00000882_0.png +ETHZ/eth01/images/image_00000883_0.png +ETHZ/eth01/images/image_00000884_0.png +ETHZ/eth01/images/image_00000885_0.png +ETHZ/eth01/images/image_00000886_0.png +ETHZ/eth01/images/image_00000887_0.png +ETHZ/eth01/images/image_00000888_0.png +ETHZ/eth01/images/image_00000889_0.png +ETHZ/eth01/images/image_00000890_0.png +ETHZ/eth01/images/image_00000891_0.png +ETHZ/eth01/images/image_00000892_0.png +ETHZ/eth01/images/image_00000893_0.png +ETHZ/eth01/images/image_00000894_0.png +ETHZ/eth01/images/image_00000895_0.png +ETHZ/eth01/images/image_00000896_0.png +ETHZ/eth01/images/image_00000897_0.png +ETHZ/eth01/images/image_00000898_0.png +ETHZ/eth01/images/image_00000899_0.png +ETHZ/eth01/images/image_00000900_0.png +ETHZ/eth01/images/image_00000901_0.png +ETHZ/eth01/images/image_00000902_0.png +ETHZ/eth01/images/image_00000903_0.png +ETHZ/eth01/images/image_00000904_0.png +ETHZ/eth01/images/image_00000905_0.png +ETHZ/eth01/images/image_00000906_0.png +ETHZ/eth01/images/image_00000907_0.png +ETHZ/eth01/images/image_00000908_0.png +ETHZ/eth01/images/image_00000909_0.png +ETHZ/eth01/images/image_00000910_0.png +ETHZ/eth01/images/image_00000911_0.png +ETHZ/eth01/images/image_00000912_0.png +ETHZ/eth01/images/image_00000913_0.png +ETHZ/eth01/images/image_00000914_0.png +ETHZ/eth01/images/image_00000915_0.png +ETHZ/eth01/images/image_00000916_0.png +ETHZ/eth01/images/image_00000917_0.png +ETHZ/eth01/images/image_00000918_0.png +ETHZ/eth01/images/image_00000919_0.png +ETHZ/eth01/images/image_00000920_0.png +ETHZ/eth01/images/image_00000921_0.png +ETHZ/eth01/images/image_00000922_0.png +ETHZ/eth01/images/image_00000923_0.png +ETHZ/eth01/images/image_00000924_0.png +ETHZ/eth01/images/image_00000925_0.png +ETHZ/eth01/images/image_00000926_0.png +ETHZ/eth01/images/image_00000927_0.png +ETHZ/eth01/images/image_00000928_0.png +ETHZ/eth01/images/image_00000929_0.png +ETHZ/eth01/images/image_00000930_0.png +ETHZ/eth01/images/image_00000931_0.png +ETHZ/eth01/images/image_00000932_0.png +ETHZ/eth01/images/image_00000933_0.png +ETHZ/eth01/images/image_00000934_0.png +ETHZ/eth01/images/image_00000935_0.png +ETHZ/eth01/images/image_00000936_0.png +ETHZ/eth01/images/image_00000937_0.png +ETHZ/eth01/images/image_00000938_0.png +ETHZ/eth01/images/image_00000939_0.png +ETHZ/eth01/images/image_00000940_0.png +ETHZ/eth01/images/image_00000941_0.png +ETHZ/eth01/images/image_00000942_0.png +ETHZ/eth01/images/image_00000943_0.png +ETHZ/eth01/images/image_00000944_0.png +ETHZ/eth01/images/image_00000945_0.png +ETHZ/eth01/images/image_00000946_0.png +ETHZ/eth01/images/image_00000947_0.png +ETHZ/eth01/images/image_00000948_0.png +ETHZ/eth01/images/image_00000949_0.png +ETHZ/eth01/images/image_00000950_0.png +ETHZ/eth01/images/image_00000951_0.png +ETHZ/eth01/images/image_00000952_0.png +ETHZ/eth01/images/image_00000953_0.png +ETHZ/eth01/images/image_00000954_0.png +ETHZ/eth01/images/image_00000955_0.png +ETHZ/eth01/images/image_00000956_0.png +ETHZ/eth01/images/image_00000957_0.png +ETHZ/eth01/images/image_00000958_0.png +ETHZ/eth01/images/image_00000959_0.png +ETHZ/eth01/images/image_00000960_0.png +ETHZ/eth01/images/image_00000961_0.png +ETHZ/eth01/images/image_00000962_0.png +ETHZ/eth01/images/image_00000963_0.png +ETHZ/eth01/images/image_00000964_0.png +ETHZ/eth01/images/image_00000965_0.png +ETHZ/eth01/images/image_00000966_0.png +ETHZ/eth01/images/image_00000967_0.png +ETHZ/eth01/images/image_00000968_0.png +ETHZ/eth01/images/image_00000969_0.png +ETHZ/eth01/images/image_00000970_0.png +ETHZ/eth01/images/image_00000971_0.png +ETHZ/eth01/images/image_00000972_0.png +ETHZ/eth01/images/image_00000973_0.png +ETHZ/eth01/images/image_00000974_0.png +ETHZ/eth01/images/image_00000975_0.png +ETHZ/eth01/images/image_00000976_0.png +ETHZ/eth01/images/image_00000977_0.png +ETHZ/eth01/images/image_00000978_0.png +ETHZ/eth01/images/image_00000979_0.png +ETHZ/eth01/images/image_00000980_0.png +ETHZ/eth01/images/image_00000981_0.png +ETHZ/eth01/images/image_00000982_0.png +ETHZ/eth01/images/image_00000983_0.png +ETHZ/eth01/images/image_00000984_0.png +ETHZ/eth01/images/image_00000985_0.png +ETHZ/eth01/images/image_00000986_0.png +ETHZ/eth01/images/image_00000987_0.png +ETHZ/eth01/images/image_00000988_0.png +ETHZ/eth01/images/image_00000989_0.png +ETHZ/eth01/images/image_00000990_0.png +ETHZ/eth01/images/image_00000991_0.png +ETHZ/eth01/images/image_00000992_0.png +ETHZ/eth01/images/image_00000993_0.png +ETHZ/eth01/images/image_00000994_0.png +ETHZ/eth01/images/image_00000995_0.png +ETHZ/eth01/images/image_00000996_0.png +ETHZ/eth01/images/image_00000997_0.png +ETHZ/eth01/images/image_00000998_0.png +ETHZ/eth01/images/image_00000999_0.png +ETHZ/eth02/images/image_00000000_0.png +ETHZ/eth02/images/image_00000001_0.png +ETHZ/eth02/images/image_00000002_0.png +ETHZ/eth02/images/image_00000003_0.png +ETHZ/eth02/images/image_00000004_0.png +ETHZ/eth02/images/image_00000005_0.png +ETHZ/eth02/images/image_00000006_0.png +ETHZ/eth02/images/image_00000007_0.png +ETHZ/eth02/images/image_00000008_0.png +ETHZ/eth02/images/image_00000009_0.png +ETHZ/eth02/images/image_00000010_0.png +ETHZ/eth02/images/image_00000011_0.png +ETHZ/eth02/images/image_00000012_0.png +ETHZ/eth02/images/image_00000013_0.png +ETHZ/eth02/images/image_00000014_0.png +ETHZ/eth02/images/image_00000015_0.png +ETHZ/eth02/images/image_00000016_0.png +ETHZ/eth02/images/image_00000017_0.png +ETHZ/eth02/images/image_00000018_0.png +ETHZ/eth02/images/image_00000019_0.png +ETHZ/eth02/images/image_00000020_0.png +ETHZ/eth02/images/image_00000021_0.png +ETHZ/eth02/images/image_00000022_0.png +ETHZ/eth02/images/image_00000023_0.png +ETHZ/eth02/images/image_00000024_0.png +ETHZ/eth02/images/image_00000025_0.png +ETHZ/eth02/images/image_00000026_0.png +ETHZ/eth02/images/image_00000027_0.png +ETHZ/eth02/images/image_00000028_0.png +ETHZ/eth02/images/image_00000029_0.png +ETHZ/eth02/images/image_00000030_0.png +ETHZ/eth02/images/image_00000031_0.png +ETHZ/eth02/images/image_00000032_0.png +ETHZ/eth02/images/image_00000033_0.png +ETHZ/eth02/images/image_00000034_0.png +ETHZ/eth02/images/image_00000035_0.png +ETHZ/eth02/images/image_00000036_0.png +ETHZ/eth02/images/image_00000037_0.png +ETHZ/eth02/images/image_00000038_0.png +ETHZ/eth02/images/image_00000039_0.png +ETHZ/eth02/images/image_00000040_0.png +ETHZ/eth02/images/image_00000041_0.png +ETHZ/eth02/images/image_00000042_0.png +ETHZ/eth02/images/image_00000043_0.png +ETHZ/eth02/images/image_00000044_0.png +ETHZ/eth02/images/image_00000045_0.png +ETHZ/eth02/images/image_00000046_0.png +ETHZ/eth02/images/image_00000047_0.png +ETHZ/eth02/images/image_00000048_0.png +ETHZ/eth02/images/image_00000049_0.png +ETHZ/eth02/images/image_00000050_0.png +ETHZ/eth02/images/image_00000051_0.png +ETHZ/eth02/images/image_00000052_0.png +ETHZ/eth02/images/image_00000053_0.png +ETHZ/eth02/images/image_00000054_0.png +ETHZ/eth02/images/image_00000055_0.png +ETHZ/eth02/images/image_00000056_0.png +ETHZ/eth02/images/image_00000057_0.png +ETHZ/eth02/images/image_00000058_0.png +ETHZ/eth02/images/image_00000059_0.png +ETHZ/eth02/images/image_00000060_0.png +ETHZ/eth02/images/image_00000061_0.png +ETHZ/eth02/images/image_00000062_0.png +ETHZ/eth02/images/image_00000063_0.png +ETHZ/eth02/images/image_00000064_0.png +ETHZ/eth02/images/image_00000065_0.png +ETHZ/eth02/images/image_00000066_0.png +ETHZ/eth02/images/image_00000067_0.png +ETHZ/eth02/images/image_00000068_0.png +ETHZ/eth02/images/image_00000069_0.png +ETHZ/eth02/images/image_00000070_0.png +ETHZ/eth02/images/image_00000071_0.png +ETHZ/eth02/images/image_00000072_0.png +ETHZ/eth02/images/image_00000073_0.png +ETHZ/eth02/images/image_00000074_0.png +ETHZ/eth02/images/image_00000075_0.png +ETHZ/eth02/images/image_00000076_0.png +ETHZ/eth02/images/image_00000077_0.png +ETHZ/eth02/images/image_00000078_0.png +ETHZ/eth02/images/image_00000079_0.png +ETHZ/eth02/images/image_00000080_0.png +ETHZ/eth02/images/image_00000081_0.png +ETHZ/eth02/images/image_00000082_0.png +ETHZ/eth02/images/image_00000083_0.png +ETHZ/eth02/images/image_00000084_0.png +ETHZ/eth02/images/image_00000085_0.png +ETHZ/eth02/images/image_00000086_0.png +ETHZ/eth02/images/image_00000087_0.png +ETHZ/eth02/images/image_00000088_0.png +ETHZ/eth02/images/image_00000089_0.png +ETHZ/eth02/images/image_00000090_0.png +ETHZ/eth02/images/image_00000091_0.png +ETHZ/eth02/images/image_00000092_0.png +ETHZ/eth02/images/image_00000093_0.png +ETHZ/eth02/images/image_00000094_0.png +ETHZ/eth02/images/image_00000095_0.png +ETHZ/eth02/images/image_00000096_0.png +ETHZ/eth02/images/image_00000097_0.png +ETHZ/eth02/images/image_00000098_0.png +ETHZ/eth02/images/image_00000099_0.png +ETHZ/eth02/images/image_00000100_0.png +ETHZ/eth02/images/image_00000101_0.png +ETHZ/eth02/images/image_00000102_0.png +ETHZ/eth02/images/image_00000103_0.png +ETHZ/eth02/images/image_00000104_0.png +ETHZ/eth02/images/image_00000105_0.png +ETHZ/eth02/images/image_00000106_0.png +ETHZ/eth02/images/image_00000107_0.png +ETHZ/eth02/images/image_00000108_0.png +ETHZ/eth02/images/image_00000109_0.png +ETHZ/eth02/images/image_00000110_0.png +ETHZ/eth02/images/image_00000111_0.png +ETHZ/eth02/images/image_00000112_0.png +ETHZ/eth02/images/image_00000113_0.png +ETHZ/eth02/images/image_00000114_0.png +ETHZ/eth02/images/image_00000115_0.png +ETHZ/eth02/images/image_00000116_0.png +ETHZ/eth02/images/image_00000117_0.png +ETHZ/eth02/images/image_00000118_0.png +ETHZ/eth02/images/image_00000119_0.png +ETHZ/eth02/images/image_00000120_0.png +ETHZ/eth02/images/image_00000121_0.png +ETHZ/eth02/images/image_00000122_0.png +ETHZ/eth02/images/image_00000123_0.png +ETHZ/eth02/images/image_00000124_0.png +ETHZ/eth02/images/image_00000125_0.png +ETHZ/eth02/images/image_00000126_0.png +ETHZ/eth02/images/image_00000127_0.png +ETHZ/eth02/images/image_00000128_0.png +ETHZ/eth02/images/image_00000129_0.png +ETHZ/eth02/images/image_00000130_0.png +ETHZ/eth02/images/image_00000131_0.png +ETHZ/eth02/images/image_00000132_0.png +ETHZ/eth02/images/image_00000133_0.png +ETHZ/eth02/images/image_00000134_0.png +ETHZ/eth02/images/image_00000135_0.png +ETHZ/eth02/images/image_00000136_0.png +ETHZ/eth02/images/image_00000137_0.png +ETHZ/eth02/images/image_00000138_0.png +ETHZ/eth02/images/image_00000139_0.png +ETHZ/eth02/images/image_00000140_0.png +ETHZ/eth02/images/image_00000141_0.png +ETHZ/eth02/images/image_00000142_0.png +ETHZ/eth02/images/image_00000143_0.png +ETHZ/eth02/images/image_00000144_0.png +ETHZ/eth02/images/image_00000145_0.png +ETHZ/eth02/images/image_00000146_0.png +ETHZ/eth02/images/image_00000147_0.png +ETHZ/eth02/images/image_00000148_0.png +ETHZ/eth02/images/image_00000149_0.png +ETHZ/eth02/images/image_00000150_0.png +ETHZ/eth02/images/image_00000151_0.png +ETHZ/eth02/images/image_00000152_0.png +ETHZ/eth02/images/image_00000153_0.png +ETHZ/eth02/images/image_00000154_0.png +ETHZ/eth02/images/image_00000155_0.png +ETHZ/eth02/images/image_00000156_0.png +ETHZ/eth02/images/image_00000157_0.png +ETHZ/eth02/images/image_00000158_0.png +ETHZ/eth02/images/image_00000159_0.png +ETHZ/eth02/images/image_00000160_0.png +ETHZ/eth02/images/image_00000161_0.png +ETHZ/eth02/images/image_00000162_0.png +ETHZ/eth02/images/image_00000163_0.png +ETHZ/eth02/images/image_00000164_0.png +ETHZ/eth02/images/image_00000165_0.png +ETHZ/eth02/images/image_00000166_0.png +ETHZ/eth02/images/image_00000167_0.png +ETHZ/eth02/images/image_00000168_0.png +ETHZ/eth02/images/image_00000169_0.png +ETHZ/eth02/images/image_00000170_0.png +ETHZ/eth02/images/image_00000171_0.png +ETHZ/eth02/images/image_00000172_0.png +ETHZ/eth02/images/image_00000173_0.png +ETHZ/eth02/images/image_00000174_0.png +ETHZ/eth02/images/image_00000175_0.png +ETHZ/eth02/images/image_00000176_0.png +ETHZ/eth02/images/image_00000177_0.png +ETHZ/eth02/images/image_00000178_0.png +ETHZ/eth02/images/image_00000179_0.png +ETHZ/eth02/images/image_00000180_0.png +ETHZ/eth02/images/image_00000181_0.png +ETHZ/eth02/images/image_00000182_0.png +ETHZ/eth02/images/image_00000183_0.png +ETHZ/eth02/images/image_00000184_0.png +ETHZ/eth02/images/image_00000185_0.png +ETHZ/eth02/images/image_00000186_0.png +ETHZ/eth02/images/image_00000187_0.png +ETHZ/eth02/images/image_00000188_0.png +ETHZ/eth02/images/image_00000189_0.png +ETHZ/eth02/images/image_00000190_0.png +ETHZ/eth02/images/image_00000191_0.png +ETHZ/eth02/images/image_00000192_0.png +ETHZ/eth02/images/image_00000193_0.png +ETHZ/eth02/images/image_00000194_0.png +ETHZ/eth02/images/image_00000195_0.png +ETHZ/eth02/images/image_00000196_0.png +ETHZ/eth02/images/image_00000197_0.png +ETHZ/eth02/images/image_00000198_0.png +ETHZ/eth02/images/image_00000199_0.png +ETHZ/eth02/images/image_00000200_0.png +ETHZ/eth02/images/image_00000201_0.png +ETHZ/eth02/images/image_00000202_0.png +ETHZ/eth02/images/image_00000203_0.png +ETHZ/eth02/images/image_00000204_0.png +ETHZ/eth02/images/image_00000205_0.png +ETHZ/eth02/images/image_00000206_0.png +ETHZ/eth02/images/image_00000207_0.png +ETHZ/eth02/images/image_00000208_0.png +ETHZ/eth02/images/image_00000209_0.png +ETHZ/eth02/images/image_00000210_0.png +ETHZ/eth02/images/image_00000211_0.png +ETHZ/eth02/images/image_00000212_0.png +ETHZ/eth02/images/image_00000213_0.png +ETHZ/eth02/images/image_00000214_0.png +ETHZ/eth02/images/image_00000215_0.png +ETHZ/eth02/images/image_00000216_0.png +ETHZ/eth02/images/image_00000217_0.png +ETHZ/eth02/images/image_00000218_0.png +ETHZ/eth02/images/image_00000219_0.png +ETHZ/eth02/images/image_00000220_0.png +ETHZ/eth02/images/image_00000221_0.png +ETHZ/eth02/images/image_00000222_0.png +ETHZ/eth02/images/image_00000223_0.png +ETHZ/eth02/images/image_00000224_0.png +ETHZ/eth02/images/image_00000225_0.png +ETHZ/eth02/images/image_00000226_0.png +ETHZ/eth02/images/image_00000227_0.png +ETHZ/eth02/images/image_00000228_0.png +ETHZ/eth02/images/image_00000229_0.png +ETHZ/eth02/images/image_00000230_0.png +ETHZ/eth02/images/image_00000231_0.png +ETHZ/eth02/images/image_00000232_0.png +ETHZ/eth02/images/image_00000233_0.png +ETHZ/eth02/images/image_00000234_0.png +ETHZ/eth02/images/image_00000235_0.png +ETHZ/eth02/images/image_00000236_0.png +ETHZ/eth02/images/image_00000237_0.png +ETHZ/eth02/images/image_00000238_0.png +ETHZ/eth02/images/image_00000239_0.png +ETHZ/eth02/images/image_00000240_0.png +ETHZ/eth02/images/image_00000241_0.png +ETHZ/eth02/images/image_00000242_0.png +ETHZ/eth02/images/image_00000243_0.png +ETHZ/eth02/images/image_00000244_0.png +ETHZ/eth02/images/image_00000245_0.png +ETHZ/eth02/images/image_00000246_0.png +ETHZ/eth02/images/image_00000247_0.png +ETHZ/eth02/images/image_00000248_0.png +ETHZ/eth02/images/image_00000249_0.png +ETHZ/eth02/images/image_00000250_0.png +ETHZ/eth02/images/image_00000251_0.png +ETHZ/eth02/images/image_00000252_0.png +ETHZ/eth02/images/image_00000253_0.png +ETHZ/eth02/images/image_00000254_0.png +ETHZ/eth02/images/image_00000255_0.png +ETHZ/eth02/images/image_00000256_0.png +ETHZ/eth02/images/image_00000257_0.png +ETHZ/eth02/images/image_00000258_0.png +ETHZ/eth02/images/image_00000259_0.png +ETHZ/eth02/images/image_00000260_0.png +ETHZ/eth02/images/image_00000261_0.png +ETHZ/eth02/images/image_00000262_0.png +ETHZ/eth02/images/image_00000263_0.png +ETHZ/eth02/images/image_00000264_0.png +ETHZ/eth02/images/image_00000265_0.png +ETHZ/eth02/images/image_00000266_0.png +ETHZ/eth02/images/image_00000267_0.png +ETHZ/eth02/images/image_00000268_0.png +ETHZ/eth02/images/image_00000269_0.png +ETHZ/eth02/images/image_00000270_0.png +ETHZ/eth02/images/image_00000271_0.png +ETHZ/eth02/images/image_00000272_0.png +ETHZ/eth02/images/image_00000273_0.png +ETHZ/eth02/images/image_00000274_0.png +ETHZ/eth02/images/image_00000275_0.png +ETHZ/eth02/images/image_00000276_0.png +ETHZ/eth02/images/image_00000277_0.png +ETHZ/eth02/images/image_00000278_0.png +ETHZ/eth02/images/image_00000279_0.png +ETHZ/eth02/images/image_00000280_0.png +ETHZ/eth02/images/image_00000281_0.png +ETHZ/eth02/images/image_00000282_0.png +ETHZ/eth02/images/image_00000283_0.png +ETHZ/eth02/images/image_00000284_0.png +ETHZ/eth02/images/image_00000285_0.png +ETHZ/eth02/images/image_00000286_0.png +ETHZ/eth02/images/image_00000287_0.png +ETHZ/eth02/images/image_00000288_0.png +ETHZ/eth02/images/image_00000289_0.png +ETHZ/eth02/images/image_00000290_0.png +ETHZ/eth02/images/image_00000291_0.png +ETHZ/eth02/images/image_00000292_0.png +ETHZ/eth02/images/image_00000293_0.png +ETHZ/eth02/images/image_00000294_0.png +ETHZ/eth02/images/image_00000295_0.png +ETHZ/eth02/images/image_00000296_0.png +ETHZ/eth02/images/image_00000297_0.png +ETHZ/eth02/images/image_00000298_0.png +ETHZ/eth02/images/image_00000299_0.png +ETHZ/eth02/images/image_00000300_0.png +ETHZ/eth02/images/image_00000301_0.png +ETHZ/eth02/images/image_00000302_0.png +ETHZ/eth02/images/image_00000303_0.png +ETHZ/eth02/images/image_00000304_0.png +ETHZ/eth02/images/image_00000305_0.png +ETHZ/eth02/images/image_00000306_0.png +ETHZ/eth02/images/image_00000307_0.png +ETHZ/eth02/images/image_00000308_0.png +ETHZ/eth02/images/image_00000309_0.png +ETHZ/eth02/images/image_00000310_0.png +ETHZ/eth02/images/image_00000311_0.png +ETHZ/eth02/images/image_00000312_0.png +ETHZ/eth02/images/image_00000313_0.png +ETHZ/eth02/images/image_00000314_0.png +ETHZ/eth02/images/image_00000315_0.png +ETHZ/eth02/images/image_00000316_0.png +ETHZ/eth02/images/image_00000317_0.png +ETHZ/eth02/images/image_00000318_0.png +ETHZ/eth02/images/image_00000319_0.png +ETHZ/eth02/images/image_00000320_0.png +ETHZ/eth02/images/image_00000321_0.png +ETHZ/eth02/images/image_00000322_0.png +ETHZ/eth02/images/image_00000323_0.png +ETHZ/eth02/images/image_00000324_0.png +ETHZ/eth02/images/image_00000325_0.png +ETHZ/eth02/images/image_00000326_0.png +ETHZ/eth02/images/image_00000327_0.png +ETHZ/eth02/images/image_00000328_0.png +ETHZ/eth02/images/image_00000329_0.png +ETHZ/eth02/images/image_00000330_0.png +ETHZ/eth02/images/image_00000331_0.png +ETHZ/eth02/images/image_00000332_0.png +ETHZ/eth02/images/image_00000333_0.png +ETHZ/eth02/images/image_00000334_0.png +ETHZ/eth02/images/image_00000335_0.png +ETHZ/eth02/images/image_00000336_0.png +ETHZ/eth02/images/image_00000337_0.png +ETHZ/eth02/images/image_00000338_0.png +ETHZ/eth02/images/image_00000339_0.png +ETHZ/eth02/images/image_00000340_0.png +ETHZ/eth02/images/image_00000341_0.png +ETHZ/eth02/images/image_00000342_0.png +ETHZ/eth02/images/image_00000343_0.png +ETHZ/eth02/images/image_00000344_0.png +ETHZ/eth02/images/image_00000345_0.png +ETHZ/eth02/images/image_00000346_0.png +ETHZ/eth02/images/image_00000347_0.png +ETHZ/eth02/images/image_00000348_0.png +ETHZ/eth02/images/image_00000349_0.png +ETHZ/eth02/images/image_00000350_0.png +ETHZ/eth02/images/image_00000351_0.png +ETHZ/eth02/images/image_00000352_0.png +ETHZ/eth02/images/image_00000353_0.png +ETHZ/eth02/images/image_00000354_0.png +ETHZ/eth02/images/image_00000355_0.png +ETHZ/eth02/images/image_00000356_0.png +ETHZ/eth02/images/image_00000357_0.png +ETHZ/eth02/images/image_00000358_0.png +ETHZ/eth02/images/image_00000359_0.png +ETHZ/eth02/images/image_00000360_0.png +ETHZ/eth02/images/image_00000361_0.png +ETHZ/eth02/images/image_00000362_0.png +ETHZ/eth02/images/image_00000363_0.png +ETHZ/eth02/images/image_00000364_0.png +ETHZ/eth02/images/image_00000365_0.png +ETHZ/eth02/images/image_00000366_0.png +ETHZ/eth02/images/image_00000367_0.png +ETHZ/eth02/images/image_00000368_0.png +ETHZ/eth02/images/image_00000369_0.png +ETHZ/eth02/images/image_00000370_0.png +ETHZ/eth02/images/image_00000371_0.png +ETHZ/eth02/images/image_00000372_0.png +ETHZ/eth02/images/image_00000373_0.png +ETHZ/eth02/images/image_00000374_0.png +ETHZ/eth02/images/image_00000375_0.png +ETHZ/eth02/images/image_00000376_0.png +ETHZ/eth02/images/image_00000377_0.png +ETHZ/eth02/images/image_00000378_0.png +ETHZ/eth02/images/image_00000379_0.png +ETHZ/eth02/images/image_00000380_0.png +ETHZ/eth02/images/image_00000381_0.png +ETHZ/eth02/images/image_00000382_0.png +ETHZ/eth02/images/image_00000383_0.png +ETHZ/eth02/images/image_00000384_0.png +ETHZ/eth02/images/image_00000385_0.png +ETHZ/eth02/images/image_00000386_0.png +ETHZ/eth02/images/image_00000387_0.png +ETHZ/eth02/images/image_00000388_0.png +ETHZ/eth02/images/image_00000389_0.png +ETHZ/eth02/images/image_00000390_0.png +ETHZ/eth02/images/image_00000391_0.png +ETHZ/eth02/images/image_00000392_0.png +ETHZ/eth02/images/image_00000393_0.png +ETHZ/eth02/images/image_00000394_0.png +ETHZ/eth02/images/image_00000395_0.png +ETHZ/eth02/images/image_00000396_0.png +ETHZ/eth02/images/image_00000397_0.png +ETHZ/eth02/images/image_00000398_0.png +ETHZ/eth02/images/image_00000399_0.png +ETHZ/eth02/images/image_00000400_0.png +ETHZ/eth02/images/image_00000401_0.png +ETHZ/eth02/images/image_00000402_0.png +ETHZ/eth02/images/image_00000403_0.png +ETHZ/eth02/images/image_00000404_0.png +ETHZ/eth02/images/image_00000405_0.png +ETHZ/eth02/images/image_00000406_0.png +ETHZ/eth02/images/image_00000407_0.png +ETHZ/eth02/images/image_00000408_0.png +ETHZ/eth02/images/image_00000409_0.png +ETHZ/eth02/images/image_00000410_0.png +ETHZ/eth02/images/image_00000411_0.png +ETHZ/eth02/images/image_00000412_0.png +ETHZ/eth02/images/image_00000413_0.png +ETHZ/eth02/images/image_00000414_0.png +ETHZ/eth02/images/image_00000415_0.png +ETHZ/eth02/images/image_00000416_0.png +ETHZ/eth02/images/image_00000417_0.png +ETHZ/eth02/images/image_00000418_0.png +ETHZ/eth02/images/image_00000419_0.png +ETHZ/eth02/images/image_00000420_0.png +ETHZ/eth02/images/image_00000421_0.png +ETHZ/eth02/images/image_00000422_0.png +ETHZ/eth02/images/image_00000423_0.png +ETHZ/eth02/images/image_00000424_0.png +ETHZ/eth02/images/image_00000425_0.png +ETHZ/eth02/images/image_00000426_0.png +ETHZ/eth02/images/image_00000427_0.png +ETHZ/eth02/images/image_00000428_0.png +ETHZ/eth02/images/image_00000429_0.png +ETHZ/eth02/images/image_00000430_0.png +ETHZ/eth02/images/image_00000431_0.png +ETHZ/eth02/images/image_00000432_0.png +ETHZ/eth02/images/image_00000433_0.png +ETHZ/eth02/images/image_00000434_0.png +ETHZ/eth02/images/image_00000435_0.png +ETHZ/eth02/images/image_00000436_0.png +ETHZ/eth02/images/image_00000437_0.png +ETHZ/eth02/images/image_00000438_0.png +ETHZ/eth02/images/image_00000439_0.png +ETHZ/eth02/images/image_00000440_0.png +ETHZ/eth02/images/image_00000441_0.png +ETHZ/eth02/images/image_00000442_0.png +ETHZ/eth02/images/image_00000443_0.png +ETHZ/eth02/images/image_00000444_0.png +ETHZ/eth02/images/image_00000445_0.png +ETHZ/eth03/images/image_00000100_0.png +ETHZ/eth03/images/image_00000101_0.png +ETHZ/eth03/images/image_00000102_0.png +ETHZ/eth03/images/image_00000103_0.png +ETHZ/eth03/images/image_00000104_0.png +ETHZ/eth03/images/image_00000105_0.png +ETHZ/eth03/images/image_00000106_0.png +ETHZ/eth03/images/image_00000107_0.png +ETHZ/eth03/images/image_00000108_0.png +ETHZ/eth03/images/image_00000109_0.png +ETHZ/eth03/images/image_00000110_0.png +ETHZ/eth03/images/image_00000111_0.png +ETHZ/eth03/images/image_00000112_0.png +ETHZ/eth03/images/image_00000113_0.png +ETHZ/eth03/images/image_00000114_0.png +ETHZ/eth03/images/image_00000115_0.png +ETHZ/eth03/images/image_00000116_0.png +ETHZ/eth03/images/image_00000117_0.png +ETHZ/eth03/images/image_00000118_0.png +ETHZ/eth03/images/image_00000119_0.png +ETHZ/eth03/images/image_00000120_0.png +ETHZ/eth03/images/image_00000121_0.png +ETHZ/eth03/images/image_00000122_0.png +ETHZ/eth03/images/image_00000123_0.png +ETHZ/eth03/images/image_00000124_0.png +ETHZ/eth03/images/image_00000125_0.png +ETHZ/eth03/images/image_00000126_0.png +ETHZ/eth03/images/image_00000127_0.png +ETHZ/eth03/images/image_00000128_0.png +ETHZ/eth03/images/image_00000129_0.png +ETHZ/eth03/images/image_00000130_0.png +ETHZ/eth03/images/image_00000131_0.png +ETHZ/eth03/images/image_00000132_0.png +ETHZ/eth03/images/image_00000133_0.png +ETHZ/eth03/images/image_00000134_0.png +ETHZ/eth03/images/image_00000135_0.png +ETHZ/eth03/images/image_00000136_0.png +ETHZ/eth03/images/image_00000137_0.png +ETHZ/eth03/images/image_00000138_0.png +ETHZ/eth03/images/image_00000139_0.png +ETHZ/eth03/images/image_00000140_0.png +ETHZ/eth03/images/image_00000141_0.png +ETHZ/eth03/images/image_00000142_0.png +ETHZ/eth03/images/image_00000143_0.png +ETHZ/eth03/images/image_00000144_0.png +ETHZ/eth03/images/image_00000145_0.png +ETHZ/eth03/images/image_00000146_0.png +ETHZ/eth03/images/image_00000147_0.png +ETHZ/eth03/images/image_00000148_0.png +ETHZ/eth03/images/image_00000149_0.png +ETHZ/eth03/images/image_00000150_0.png +ETHZ/eth03/images/image_00000151_0.png +ETHZ/eth03/images/image_00000152_0.png +ETHZ/eth03/images/image_00000153_0.png +ETHZ/eth03/images/image_00000154_0.png +ETHZ/eth03/images/image_00000155_0.png +ETHZ/eth03/images/image_00000156_0.png +ETHZ/eth03/images/image_00000157_0.png +ETHZ/eth03/images/image_00000158_0.png +ETHZ/eth03/images/image_00000159_0.png +ETHZ/eth03/images/image_00000160_0.png +ETHZ/eth03/images/image_00000161_0.png +ETHZ/eth03/images/image_00000162_0.png +ETHZ/eth03/images/image_00000163_0.png +ETHZ/eth03/images/image_00000164_0.png +ETHZ/eth03/images/image_00000165_0.png +ETHZ/eth03/images/image_00000166_0.png +ETHZ/eth03/images/image_00000167_0.png +ETHZ/eth03/images/image_00000168_0.png +ETHZ/eth03/images/image_00000169_0.png +ETHZ/eth03/images/image_00000170_0.png +ETHZ/eth03/images/image_00000171_0.png +ETHZ/eth03/images/image_00000172_0.png +ETHZ/eth03/images/image_00000173_0.png +ETHZ/eth03/images/image_00000174_0.png +ETHZ/eth03/images/image_00000175_0.png +ETHZ/eth03/images/image_00000176_0.png +ETHZ/eth03/images/image_00000177_0.png +ETHZ/eth03/images/image_00000178_0.png +ETHZ/eth03/images/image_00000179_0.png +ETHZ/eth03/images/image_00000180_0.png +ETHZ/eth03/images/image_00000181_0.png +ETHZ/eth03/images/image_00000182_0.png +ETHZ/eth03/images/image_00000183_0.png +ETHZ/eth03/images/image_00000184_0.png +ETHZ/eth03/images/image_00000185_0.png +ETHZ/eth03/images/image_00000186_0.png +ETHZ/eth03/images/image_00000187_0.png +ETHZ/eth03/images/image_00000188_0.png +ETHZ/eth03/images/image_00000189_0.png +ETHZ/eth03/images/image_00000190_0.png +ETHZ/eth03/images/image_00000191_0.png +ETHZ/eth03/images/image_00000192_0.png +ETHZ/eth03/images/image_00000193_0.png +ETHZ/eth03/images/image_00000194_0.png +ETHZ/eth03/images/image_00000195_0.png +ETHZ/eth03/images/image_00000196_0.png +ETHZ/eth03/images/image_00000197_0.png +ETHZ/eth03/images/image_00000198_0.png +ETHZ/eth03/images/image_00000199_0.png +ETHZ/eth03/images/image_00000200_0.png +ETHZ/eth03/images/image_00000201_0.png +ETHZ/eth03/images/image_00000202_0.png +ETHZ/eth03/images/image_00000203_0.png +ETHZ/eth03/images/image_00000204_0.png +ETHZ/eth03/images/image_00000205_0.png +ETHZ/eth03/images/image_00000206_0.png +ETHZ/eth03/images/image_00000207_0.png +ETHZ/eth03/images/image_00000208_0.png +ETHZ/eth03/images/image_00000209_0.png +ETHZ/eth03/images/image_00000210_0.png +ETHZ/eth03/images/image_00000211_0.png +ETHZ/eth03/images/image_00000212_0.png +ETHZ/eth03/images/image_00000213_0.png +ETHZ/eth03/images/image_00000214_0.png +ETHZ/eth03/images/image_00000215_0.png +ETHZ/eth03/images/image_00000216_0.png +ETHZ/eth03/images/image_00000217_0.png +ETHZ/eth03/images/image_00000218_0.png +ETHZ/eth03/images/image_00000219_0.png +ETHZ/eth03/images/image_00000220_0.png +ETHZ/eth03/images/image_00000221_0.png +ETHZ/eth03/images/image_00000222_0.png +ETHZ/eth03/images/image_00000223_0.png +ETHZ/eth03/images/image_00000224_0.png +ETHZ/eth03/images/image_00000225_0.png +ETHZ/eth03/images/image_00000226_0.png +ETHZ/eth03/images/image_00000227_0.png +ETHZ/eth03/images/image_00000228_0.png +ETHZ/eth03/images/image_00000229_0.png +ETHZ/eth03/images/image_00000230_0.png +ETHZ/eth03/images/image_00000231_0.png +ETHZ/eth03/images/image_00000232_0.png +ETHZ/eth03/images/image_00000233_0.png +ETHZ/eth03/images/image_00000234_0.png +ETHZ/eth03/images/image_00000235_0.png +ETHZ/eth03/images/image_00000236_0.png +ETHZ/eth03/images/image_00000237_0.png +ETHZ/eth03/images/image_00000238_0.png +ETHZ/eth03/images/image_00000239_0.png +ETHZ/eth03/images/image_00000240_0.png +ETHZ/eth03/images/image_00000241_0.png +ETHZ/eth03/images/image_00000242_0.png +ETHZ/eth03/images/image_00000243_0.png +ETHZ/eth03/images/image_00000244_0.png +ETHZ/eth03/images/image_00000245_0.png +ETHZ/eth03/images/image_00000246_0.png +ETHZ/eth03/images/image_00000247_0.png +ETHZ/eth03/images/image_00000248_0.png +ETHZ/eth03/images/image_00000249_0.png +ETHZ/eth03/images/image_00000250_0.png +ETHZ/eth03/images/image_00000251_0.png +ETHZ/eth03/images/image_00000252_0.png +ETHZ/eth03/images/image_00000253_0.png +ETHZ/eth03/images/image_00000254_0.png +ETHZ/eth03/images/image_00000255_0.png +ETHZ/eth03/images/image_00000256_0.png +ETHZ/eth03/images/image_00000257_0.png +ETHZ/eth03/images/image_00000258_0.png +ETHZ/eth03/images/image_00000259_0.png +ETHZ/eth03/images/image_00000260_0.png +ETHZ/eth03/images/image_00000261_0.png +ETHZ/eth03/images/image_00000262_0.png +ETHZ/eth03/images/image_00000263_0.png +ETHZ/eth03/images/image_00000264_0.png +ETHZ/eth03/images/image_00000265_0.png +ETHZ/eth03/images/image_00000266_0.png +ETHZ/eth03/images/image_00000267_0.png +ETHZ/eth03/images/image_00000268_0.png +ETHZ/eth03/images/image_00000269_0.png +ETHZ/eth03/images/image_00000270_0.png +ETHZ/eth03/images/image_00000271_0.png +ETHZ/eth03/images/image_00000272_0.png +ETHZ/eth03/images/image_00000273_0.png +ETHZ/eth03/images/image_00000274_0.png +ETHZ/eth03/images/image_00000275_0.png +ETHZ/eth03/images/image_00000276_0.png +ETHZ/eth03/images/image_00000277_0.png +ETHZ/eth03/images/image_00000278_0.png +ETHZ/eth03/images/image_00000279_0.png +ETHZ/eth03/images/image_00000280_0.png +ETHZ/eth03/images/image_00000281_0.png +ETHZ/eth03/images/image_00000282_0.png +ETHZ/eth03/images/image_00000283_0.png +ETHZ/eth03/images/image_00000284_0.png +ETHZ/eth03/images/image_00000285_0.png +ETHZ/eth03/images/image_00000286_0.png +ETHZ/eth03/images/image_00000287_0.png +ETHZ/eth03/images/image_00000288_0.png +ETHZ/eth03/images/image_00000289_0.png +ETHZ/eth03/images/image_00000290_0.png +ETHZ/eth03/images/image_00000291_0.png +ETHZ/eth03/images/image_00000292_0.png +ETHZ/eth03/images/image_00000293_0.png +ETHZ/eth03/images/image_00000294_0.png +ETHZ/eth03/images/image_00000295_0.png +ETHZ/eth03/images/image_00000296_0.png +ETHZ/eth03/images/image_00000297_0.png +ETHZ/eth03/images/image_00000298_0.png +ETHZ/eth03/images/image_00000299_0.png +ETHZ/eth03/images/image_00000300_0.png +ETHZ/eth03/images/image_00000301_0.png +ETHZ/eth03/images/image_00000302_0.png +ETHZ/eth03/images/image_00000303_0.png +ETHZ/eth03/images/image_00000304_0.png +ETHZ/eth03/images/image_00000305_0.png +ETHZ/eth03/images/image_00000306_0.png +ETHZ/eth03/images/image_00000307_0.png +ETHZ/eth03/images/image_00000308_0.png +ETHZ/eth03/images/image_00000309_0.png +ETHZ/eth03/images/image_00000310_0.png +ETHZ/eth03/images/image_00000311_0.png +ETHZ/eth03/images/image_00000312_0.png +ETHZ/eth03/images/image_00000313_0.png +ETHZ/eth03/images/image_00000314_0.png +ETHZ/eth03/images/image_00000315_0.png +ETHZ/eth03/images/image_00000316_0.png +ETHZ/eth03/images/image_00000317_0.png +ETHZ/eth03/images/image_00000318_0.png +ETHZ/eth03/images/image_00000319_0.png +ETHZ/eth03/images/image_00000320_0.png +ETHZ/eth03/images/image_00000321_0.png +ETHZ/eth03/images/image_00000322_0.png +ETHZ/eth03/images/image_00000323_0.png +ETHZ/eth03/images/image_00000324_0.png +ETHZ/eth03/images/image_00000325_0.png +ETHZ/eth03/images/image_00000326_0.png +ETHZ/eth03/images/image_00000327_0.png +ETHZ/eth03/images/image_00000328_0.png +ETHZ/eth03/images/image_00000329_0.png +ETHZ/eth03/images/image_00000330_0.png +ETHZ/eth03/images/image_00000331_0.png +ETHZ/eth03/images/image_00000332_0.png +ETHZ/eth03/images/image_00000333_0.png +ETHZ/eth03/images/image_00000334_0.png +ETHZ/eth03/images/image_00000335_0.png +ETHZ/eth03/images/image_00000336_0.png +ETHZ/eth03/images/image_00000337_0.png +ETHZ/eth03/images/image_00000338_0.png +ETHZ/eth03/images/image_00000339_0.png +ETHZ/eth03/images/image_00000340_0.png +ETHZ/eth03/images/image_00000341_0.png +ETHZ/eth03/images/image_00000342_0.png +ETHZ/eth03/images/image_00000343_0.png +ETHZ/eth03/images/image_00000344_0.png +ETHZ/eth03/images/image_00000345_0.png +ETHZ/eth03/images/image_00000346_0.png +ETHZ/eth03/images/image_00000347_0.png +ETHZ/eth03/images/image_00000348_0.png +ETHZ/eth03/images/image_00000349_0.png +ETHZ/eth03/images/image_00000350_0.png +ETHZ/eth03/images/image_00000351_0.png +ETHZ/eth03/images/image_00000352_0.png +ETHZ/eth03/images/image_00000353_0.png +ETHZ/eth03/images/image_00000354_0.png +ETHZ/eth03/images/image_00000355_0.png +ETHZ/eth03/images/image_00000356_0.png +ETHZ/eth03/images/image_00000357_0.png +ETHZ/eth03/images/image_00000358_0.png +ETHZ/eth03/images/image_00000359_0.png +ETHZ/eth03/images/image_00000360_0.png +ETHZ/eth03/images/image_00000361_0.png +ETHZ/eth03/images/image_00000362_0.png +ETHZ/eth03/images/image_00000363_0.png +ETHZ/eth03/images/image_00000364_0.png +ETHZ/eth03/images/image_00000365_0.png +ETHZ/eth03/images/image_00000366_0.png +ETHZ/eth03/images/image_00000367_0.png +ETHZ/eth03/images/image_00000368_0.png +ETHZ/eth03/images/image_00000369_0.png +ETHZ/eth03/images/image_00000370_0.png +ETHZ/eth03/images/image_00000371_0.png +ETHZ/eth03/images/image_00000372_0.png +ETHZ/eth03/images/image_00000373_0.png +ETHZ/eth03/images/image_00000374_0.png +ETHZ/eth03/images/image_00000375_0.png +ETHZ/eth03/images/image_00000376_0.png +ETHZ/eth03/images/image_00000377_0.png +ETHZ/eth03/images/image_00000378_0.png +ETHZ/eth03/images/image_00000379_0.png +ETHZ/eth03/images/image_00000380_0.png +ETHZ/eth03/images/image_00000381_0.png +ETHZ/eth03/images/image_00000382_0.png +ETHZ/eth03/images/image_00000383_0.png +ETHZ/eth03/images/image_00000384_0.png +ETHZ/eth03/images/image_00000385_0.png +ETHZ/eth03/images/image_00000386_0.png +ETHZ/eth03/images/image_00000387_0.png +ETHZ/eth03/images/image_00000388_0.png +ETHZ/eth03/images/image_00000389_0.png +ETHZ/eth03/images/image_00000390_0.png +ETHZ/eth03/images/image_00000391_0.png +ETHZ/eth03/images/image_00000392_0.png +ETHZ/eth03/images/image_00000393_0.png +ETHZ/eth03/images/image_00000394_0.png +ETHZ/eth03/images/image_00000395_0.png +ETHZ/eth03/images/image_00000396_0.png +ETHZ/eth03/images/image_00000397_0.png +ETHZ/eth03/images/image_00000398_0.png +ETHZ/eth03/images/image_00000399_0.png +ETHZ/eth03/images/image_00000400_0.png +ETHZ/eth03/images/image_00000401_0.png +ETHZ/eth03/images/image_00000402_0.png +ETHZ/eth03/images/image_00000403_0.png +ETHZ/eth03/images/image_00000404_0.png +ETHZ/eth03/images/image_00000405_0.png +ETHZ/eth03/images/image_00000406_0.png +ETHZ/eth03/images/image_00000407_0.png +ETHZ/eth03/images/image_00000408_0.png +ETHZ/eth03/images/image_00000409_0.png +ETHZ/eth03/images/image_00000410_0.png +ETHZ/eth03/images/image_00000411_0.png +ETHZ/eth03/images/image_00000412_0.png +ETHZ/eth03/images/image_00000413_0.png +ETHZ/eth03/images/image_00000414_0.png +ETHZ/eth03/images/image_00000415_0.png +ETHZ/eth03/images/image_00000416_0.png +ETHZ/eth03/images/image_00000417_0.png +ETHZ/eth03/images/image_00000418_0.png +ETHZ/eth03/images/image_00000419_0.png +ETHZ/eth03/images/image_00000420_0.png +ETHZ/eth03/images/image_00000421_0.png +ETHZ/eth03/images/image_00000422_0.png +ETHZ/eth03/images/image_00000423_0.png +ETHZ/eth03/images/image_00000424_0.png +ETHZ/eth03/images/image_00000425_0.png +ETHZ/eth03/images/image_00000426_0.png +ETHZ/eth03/images/image_00000427_0.png +ETHZ/eth03/images/image_00000428_0.png +ETHZ/eth03/images/image_00000429_0.png +ETHZ/eth03/images/image_00000430_0.png +ETHZ/eth03/images/image_00000431_0.png +ETHZ/eth03/images/image_00000432_0.png +ETHZ/eth03/images/image_00000433_0.png +ETHZ/eth03/images/image_00000434_0.png +ETHZ/eth03/images/image_00000435_0.png +ETHZ/eth03/images/image_00000436_0.png +ETHZ/eth03/images/image_00000437_0.png +ETHZ/eth03/images/image_00000438_0.png +ETHZ/eth03/images/image_00000439_0.png +ETHZ/eth03/images/image_00000440_0.png +ETHZ/eth03/images/image_00000441_0.png +ETHZ/eth03/images/image_00000442_0.png +ETHZ/eth03/images/image_00000443_0.png +ETHZ/eth03/images/image_00000444_0.png +ETHZ/eth03/images/image_00000445_0.png +ETHZ/eth03/images/image_00000446_0.png +ETHZ/eth03/images/image_00000447_0.png +ETHZ/eth03/images/image_00000448_0.png +ETHZ/eth03/images/image_00000449_0.png +ETHZ/eth03/images/image_00000450_0.png +ETHZ/eth03/images/image_00000451_0.png +ETHZ/eth03/images/image_00000452_0.png +ETHZ/eth03/images/image_00000453_0.png +ETHZ/eth05/images/image_00000000_0.png +ETHZ/eth05/images/image_00000004_0.png +ETHZ/eth05/images/image_00000008_0.png +ETHZ/eth05/images/image_00000012_0.png +ETHZ/eth05/images/image_00000016_0.png +ETHZ/eth05/images/image_00000022_0.png +ETHZ/eth05/images/image_00000026_0.png +ETHZ/eth05/images/image_00000030_0.png +ETHZ/eth05/images/image_00000034_0.png +ETHZ/eth05/images/image_00000038_0.png +ETHZ/eth05/images/image_00000042_0.png +ETHZ/eth05/images/image_00000046_0.png +ETHZ/eth05/images/image_00000050_0.png +ETHZ/eth05/images/image_00000054_0.png +ETHZ/eth05/images/image_00000058_0.png +ETHZ/eth05/images/image_00000062_0.png +ETHZ/eth05/images/image_00000066_0.png +ETHZ/eth05/images/image_00000070_0.png +ETHZ/eth05/images/image_00000074_0.png +ETHZ/eth05/images/image_00000078_0.png +ETHZ/eth05/images/image_00000082_0.png +ETHZ/eth05/images/image_00000086_0.png +ETHZ/eth05/images/image_00000090_0.png +ETHZ/eth05/images/image_00000094_0.png +ETHZ/eth05/images/image_00000098_0.png +ETHZ/eth05/images/image_00000101_0.png +ETHZ/eth05/images/image_00000105_0.png +ETHZ/eth05/images/image_00000109_0.png +ETHZ/eth05/images/image_00000113_0.png +ETHZ/eth05/images/image_00000115_0.png +ETHZ/eth05/images/image_00000119_0.png +ETHZ/eth05/images/image_00000123_0.png +ETHZ/eth05/images/image_00000127_0.png +ETHZ/eth05/images/image_00000130_0.png +ETHZ/eth05/images/image_00000134_0.png +ETHZ/eth05/images/image_00000138_0.png +ETHZ/eth05/images/image_00000142_0.png +ETHZ/eth05/images/image_00000145_0.png +ETHZ/eth05/images/image_00000149_0.png +ETHZ/eth05/images/image_00000153_0.png +ETHZ/eth05/images/image_00000157_0.png +ETHZ/eth05/images/image_00000161_0.png +ETHZ/eth05/images/image_00000165_0.png +ETHZ/eth05/images/image_00000169_0.png +ETHZ/eth05/images/image_00000173_0.png +ETHZ/eth05/images/image_00000177_0.png +ETHZ/eth05/images/image_00000181_0.png +ETHZ/eth05/images/image_00000185_0.png +ETHZ/eth05/images/image_00000189_0.png +ETHZ/eth05/images/image_00000193_0.png +ETHZ/eth05/images/image_00000197_0.png +ETHZ/eth05/images/image_00000201_0.png +ETHZ/eth05/images/image_00000205_0.png +ETHZ/eth05/images/image_00000209_0.png +ETHZ/eth05/images/image_00000213_0.png +ETHZ/eth05/images/image_00000217_0.png +ETHZ/eth07/images/image_00004900_0.png +ETHZ/eth07/images/image_00004904_0.png +ETHZ/eth07/images/image_00004908_0.png +ETHZ/eth07/images/image_00004912_0.png +ETHZ/eth07/images/image_00004916_0.png +ETHZ/eth07/images/image_00004920_0.png +ETHZ/eth07/images/image_00004924_0.png +ETHZ/eth07/images/image_00004928_0.png +ETHZ/eth07/images/image_00004932_0.png +ETHZ/eth07/images/image_00004936_0.png +ETHZ/eth07/images/image_00004940_0.png +ETHZ/eth07/images/image_00004944_0.png +ETHZ/eth07/images/image_00004948_0.png +ETHZ/eth07/images/image_00004952_0.png +ETHZ/eth07/images/image_00004956_0.png +ETHZ/eth07/images/image_00004960_0.png +ETHZ/eth07/images/image_00004964_0.png +ETHZ/eth07/images/image_00004968_0.png +ETHZ/eth07/images/image_00004972_0.png +ETHZ/eth07/images/image_00004976_0.png +ETHZ/eth07/images/image_00004980_0.png +ETHZ/eth07/images/image_00004984_0.png +ETHZ/eth07/images/image_00004988_0.png +ETHZ/eth07/images/image_00004992_0.png +ETHZ/eth07/images/image_00004996_0.png +ETHZ/eth07/images/image_00005000_0.png +ETHZ/eth07/images/image_00005004_0.png +ETHZ/eth07/images/image_00005008_0.png +ETHZ/eth07/images/image_00005012_0.png +ETHZ/eth07/images/image_00005016_0.png +ETHZ/eth07/images/image_00005020_0.png +ETHZ/eth07/images/image_00005024_0.png +ETHZ/eth07/images/image_00005028_0.png +ETHZ/eth07/images/image_00005032_0.png +ETHZ/eth07/images/image_00005036_0.png +ETHZ/eth07/images/image_00005040_0.png +ETHZ/eth07/images/image_00005044_0.png +ETHZ/eth07/images/image_00005048_0.png +ETHZ/eth07/images/image_00005052_0.png +ETHZ/eth07/images/image_00005056_0.png +ETHZ/eth07/images/image_00005060_0.png +ETHZ/eth07/images/image_00005064_0.png +ETHZ/eth07/images/image_00005068_0.png +ETHZ/eth07/images/image_00005072_0.png +ETHZ/eth07/images/image_00005076_0.png +ETHZ/eth07/images/image_00005080_0.png +ETHZ/eth07/images/image_00005084_0.png +ETHZ/eth07/images/image_00005088_0.png +ETHZ/eth07/images/image_00005092_0.png +ETHZ/eth07/images/image_00005096_0.png +ETHZ/eth07/images/image_00005100_0.png +ETHZ/eth07/images/image_00005104_0.png +ETHZ/eth07/images/image_00005108_0.png +ETHZ/eth07/images/image_00005112_0.png +ETHZ/eth07/images/image_00005116_0.png +ETHZ/eth07/images/image_00005120_0.png +ETHZ/eth07/images/image_00005124_0.png +ETHZ/eth07/images/image_00005128_0.png +ETHZ/eth07/images/image_00005132_0.png +ETHZ/eth07/images/image_00005136_0.png +ETHZ/eth07/images/image_00005140_0.png +ETHZ/eth07/images/image_00005144_0.png +ETHZ/eth07/images/image_00005148_0.png +ETHZ/eth07/images/image_00005152_0.png +ETHZ/eth07/images/image_00005156_0.png +ETHZ/eth07/images/image_00005160_0.png +ETHZ/eth07/images/image_00005164_0.png +ETHZ/eth07/images/image_00005168_0.png +ETHZ/eth07/images/image_00005172_0.png +ETHZ/eth07/images/image_00005176_0.png +ETHZ/eth07/images/image_00005180_0.png +ETHZ/eth07/images/image_00005184_0.png +ETHZ/eth07/images/image_00005188_0.png +ETHZ/eth07/images/image_00005192_0.png +ETHZ/eth07/images/image_00005196_0.png +ETHZ/eth07/images/image_00005200_0.png +ETHZ/eth07/images/image_00005204_0.png +ETHZ/eth07/images/image_00005208_0.png +ETHZ/eth07/images/image_00005212_0.png +ETHZ/eth07/images/image_00005216_0.png +ETHZ/eth07/images/image_00005220_0.png +ETHZ/eth07/images/image_00005224_0.png +ETHZ/eth07/images/image_00005228_0.png +ETHZ/eth07/images/image_00005232_0.png +ETHZ/eth07/images/image_00005236_0.png +ETHZ/eth07/images/image_00005240_0.png +ETHZ/eth07/images/image_00005244_0.png +ETHZ/eth07/images/image_00005248_0.png +ETHZ/eth07/images/image_00005252_0.png +ETHZ/eth07/images/image_00005256_0.png +ETHZ/eth07/images/image_00005260_0.png +ETHZ/eth07/images/image_00005264_0.png +ETHZ/eth07/images/image_00005268_0.png +ETHZ/eth07/images/image_00005272_0.png +ETHZ/eth07/images/image_00005276_0.png +ETHZ/eth07/images/image_00005280_0.png +ETHZ/eth07/images/image_00005284_0.png +ETHZ/eth07/images/image_00005288_0.png +ETHZ/eth07/images/image_00005292_0.png +ETHZ/eth07/images/image_00005296_0.png +ETHZ/eth07/images/image_00005300_0.png +ETHZ/eth07/images/image_00005304_0.png +ETHZ/eth07/images/image_00005308_0.png +ETHZ/eth07/images/image_00005312_0.png +ETHZ/eth07/images/image_00005316_0.png +ETHZ/eth07/images/image_00005320_0.png +ETHZ/eth07/images/image_00005324_0.png +ETHZ/eth07/images/image_00005328_0.png +ETHZ/eth07/images/image_00005332_0.png +ETHZ/eth07/images/image_00005336_0.png +ETHZ/eth07/images/image_00005340_0.png +ETHZ/eth07/images/image_00005344_0.png +ETHZ/eth07/images/image_00005348_0.png +ETHZ/eth07/images/image_00005352_0.png +ETHZ/eth07/images/image_00005356_0.png +ETHZ/eth07/images/image_00005360_0.png +ETHZ/eth07/images/image_00005364_0.png +ETHZ/eth07/images/image_00005368_0.png +ETHZ/eth07/images/image_00005372_0.png +ETHZ/eth07/images/image_00005376_0.png +ETHZ/eth07/images/image_00005380_0.png +ETHZ/eth07/images/image_00005384_0.png +ETHZ/eth07/images/image_00005388_0.png +ETHZ/eth07/images/image_00005392_0.png +ETHZ/eth07/images/image_00005396_0.png +ETHZ/eth07/images/image_00005400_0.png +ETHZ/eth07/images/image_00005404_0.png +ETHZ/eth07/images/image_00005408_0.png +ETHZ/eth07/images/image_00005412_0.png +ETHZ/eth07/images/image_00005416_0.png +ETHZ/eth07/images/image_00005420_0.png +ETHZ/eth07/images/image_00005424_0.png +ETHZ/eth07/images/image_00005428_0.png +ETHZ/eth07/images/image_00005432_0.png +ETHZ/eth07/images/image_00005436_0.png +ETHZ/eth07/images/image_00005440_0.png +ETHZ/eth07/images/image_00005444_0.png +ETHZ/eth07/images/image_00005448_0.png +ETHZ/eth07/images/image_00005452_0.png +ETHZ/eth07/images/image_00005456_0.png +ETHZ/eth07/images/image_00005460_0.png +ETHZ/eth07/images/image_00005464_0.png +ETHZ/eth07/images/image_00005468_0.png +ETHZ/eth07/images/image_00005472_0.png +ETHZ/eth07/images/image_00005476_0.png +ETHZ/eth07/images/image_00005480_0.png +ETHZ/eth07/images/image_00005484_0.png +ETHZ/eth07/images/image_00005488_0.png +ETHZ/eth07/images/image_00005492_0.png +ETHZ/eth07/images/image_00005496_0.png +ETHZ/eth07/images/image_00005500_0.png +ETHZ/eth07/images/image_00005504_0.png +ETHZ/eth07/images/image_00005508_0.png +ETHZ/eth07/images/image_00005512_0.png +ETHZ/eth07/images/image_00005516_0.png +ETHZ/eth07/images/image_00005520_0.png +ETHZ/eth07/images/image_00005524_0.png +ETHZ/eth07/images/image_00005528_0.png +ETHZ/eth07/images/image_00005532_0.png +ETHZ/eth07/images/image_00005536_0.png +ETHZ/eth07/images/image_00005540_0.png +ETHZ/eth07/images/image_00005544_0.png +ETHZ/eth07/images/image_00005548_0.png +ETHZ/eth07/images/image_00005552_0.png +ETHZ/eth07/images/image_00005556_0.png +ETHZ/eth07/images/image_00005560_0.png +ETHZ/eth07/images/image_00005564_0.png +ETHZ/eth07/images/image_00005568_0.png +ETHZ/eth07/images/image_00005572_0.png +ETHZ/eth07/images/image_00005576_0.png +ETHZ/eth07/images/image_00005580_0.png +ETHZ/eth07/images/image_00005584_0.png +ETHZ/eth07/images/image_00005588_0.png +ETHZ/eth07/images/image_00005592_0.png +ETHZ/eth07/images/image_00005596_0.png +ETHZ/eth07/images/image_00005600_0.png +ETHZ/eth07/images/image_00005604_0.png +ETHZ/eth07/images/image_00005608_0.png +ETHZ/eth07/images/image_00005612_0.png +ETHZ/eth07/images/image_00005616_0.png +ETHZ/eth07/images/image_00005620_0.png +ETHZ/eth07/images/image_00005624_0.png +ETHZ/eth07/images/image_00005628_0.png +ETHZ/eth07/images/image_00005632_0.png +ETHZ/eth07/images/image_00005636_0.png +ETHZ/eth07/images/image_00005640_0.png +ETHZ/eth07/images/image_00005644_0.png +ETHZ/eth07/images/image_00005648_0.png +ETHZ/eth07/images/image_00005652_0.png +ETHZ/eth07/images/image_00005656_0.png +ETHZ/eth07/images/image_00005660_0.png +ETHZ/eth07/images/image_00005664_0.png +ETHZ/eth07/images/image_00005668_0.png +ETHZ/eth07/images/image_00005672_0.png +ETHZ/eth07/images/image_00005676_0.png +ETHZ/eth07/images/image_00005680_0.png +ETHZ/eth07/images/image_00005684_0.png +ETHZ/eth07/images/image_00005688_0.png +ETHZ/eth07/images/image_00005692_0.png +ETHZ/eth07/images/image_00005696_0.png +ETHZ/eth07/images/image_00005700_0.png diff --git a/deploy/ONNXRuntime/README.md b/deploy/ONNXRuntime/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4d0669081db3549f6db4a14189e73640de0688e2 --- /dev/null +++ b/deploy/ONNXRuntime/README.md @@ -0,0 +1,19 @@ +## ByteTrack-ONNXRuntime in Python + +This doc introduces how to convert your pytorch model into onnx, and how to run an onnxruntime demo to verify your convertion. + +### Convert Your Model to ONNX + +```shell +cd +python3 tools/export_onnx.py --output-name bytetrack_s.onnx -f exps/example/mot/yolox_s_mix_det.py -c pretrained/bytetrack_s_mot17.pth.tar +``` + +### ONNXRuntime Demo + +You can run onnx demo with **16 FPS** (96-core Intel(R) Xeon(R) Platinum 8163 CPU @ 2.50GHz): + +```shell +cd /deploy/ONNXRuntime +python3 onnx_inference.py +``` diff --git a/deploy/ONNXRuntime/onnx_inference.py b/deploy/ONNXRuntime/onnx_inference.py new file mode 100644 index 0000000000000000000000000000000000000000..5929174735c99a2952a5f152f0cba2a0d84770bd --- /dev/null +++ b/deploy/ONNXRuntime/onnx_inference.py @@ -0,0 +1,160 @@ +import argparse +import os + +import cv2 +import numpy as np +from loguru import logger + +import onnxruntime + +from yolox.data.data_augment import preproc as preprocess +from yolox.utils import mkdir, multiclass_nms, demo_postprocess, vis +from yolox.utils.visualize import plot_tracking +from yolox.tracker.byte_tracker import BYTETracker +from yolox.tracking_utils.timer import Timer + + +def make_parser(): + parser = argparse.ArgumentParser("onnxruntime inference sample") + parser.add_argument( + "-m", + "--model", + type=str, + default="../../bytetrack_s.onnx", + help="Input your onnx model.", + ) + parser.add_argument( + "-i", + "--video_path", + type=str, + default='../../videos/palace.mp4', + help="Path to your input image.", + ) + parser.add_argument( + "-o", + "--output_dir", + type=str, + default='demo_output', + help="Path to your output directory.", + ) + parser.add_argument( + "-s", + "--score_thr", + type=float, + default=0.1, + help="Score threshould to filter the result.", + ) + parser.add_argument( + "-n", + "--nms_thr", + type=float, + default=0.7, + help="NMS threshould.", + ) + parser.add_argument( + "--input_shape", + type=str, + default="608,1088", + help="Specify an input shape for inference.", + ) + parser.add_argument( + "--with_p6", + action="store_true", + help="Whether your model uses p6 in FPN/PAN.", + ) + # tracking args + parser.add_argument("--track_thresh", type=float, default=0.5, help="tracking confidence threshold") + parser.add_argument("--track_buffer", type=int, default=30, help="the frames for keep lost tracks") + parser.add_argument("--match_thresh", type=int, default=0.8, help="matching threshold for tracking") + parser.add_argument('--min-box-area', type=float, default=10, help='filter out tiny boxes') + parser.add_argument("--mot20", dest="mot20", default=False, action="store_true", help="test mot20.") + return parser + + +class Predictor(object): + def __init__(self, args): + self.rgb_means = (0.485, 0.456, 0.406) + self.std = (0.229, 0.224, 0.225) + self.args = args + self.session = onnxruntime.InferenceSession(args.model) + self.input_shape = tuple(map(int, args.input_shape.split(','))) + + def inference(self, ori_img, timer): + img_info = {"id": 0} + height, width = ori_img.shape[:2] + img_info["height"] = height + img_info["width"] = width + img_info["raw_img"] = ori_img + + img, ratio = preprocess(ori_img, self.input_shape, self.rgb_means, self.std) + img_info["ratio"] = ratio + ort_inputs = {self.session.get_inputs()[0].name: img[None, :, :, :]} + timer.tic() + output = self.session.run(None, ort_inputs) + predictions = demo_postprocess(output[0], self.input_shape, p6=self.args.with_p6)[0] + + boxes = predictions[:, :4] + scores = predictions[:, 4:5] * predictions[:, 5:] + + boxes_xyxy = np.ones_like(boxes) + boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2]/2. + boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3]/2. + boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2]/2. + boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3]/2. + boxes_xyxy /= ratio + dets = multiclass_nms(boxes_xyxy, scores, nms_thr=self.args.nms_thr, score_thr=self.args.score_thr) + return dets[:, :-1], img_info + + +def imageflow_demo(predictor, args): + cap = cv2.VideoCapture(args.video_path) + width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) # float + height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) # float + fps = cap.get(cv2.CAP_PROP_FPS) + save_folder = args.output_dir + os.makedirs(save_folder, exist_ok=True) + save_path = os.path.join(save_folder, args.video_path.split("/")[-1]) + logger.info(f"video save_path is {save_path}") + vid_writer = cv2.VideoWriter( + save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height)) + ) + tracker = BYTETracker(args, frame_rate=30) + timer = Timer() + frame_id = 0 + results = [] + while True: + if frame_id % 20 == 0: + logger.info('Processing frame {} ({:.2f} fps)'.format(frame_id, 1. / max(1e-5, timer.average_time))) + ret_val, frame = cap.read() + if ret_val: + outputs, img_info = predictor.inference(frame, timer) + online_targets = tracker.update(outputs, [img_info['height'], img_info['width']], [img_info['height'], img_info['width']]) + online_tlwhs = [] + online_ids = [] + online_scores = [] + for t in online_targets: + tlwh = t.tlwh + tid = t.track_id + vertical = tlwh[2] / tlwh[3] > 1.6 + if tlwh[2] * tlwh[3] > args.min_box_area and not vertical: + online_tlwhs.append(tlwh) + online_ids.append(tid) + online_scores.append(t.score) + timer.toc() + results.append((frame_id + 1, online_tlwhs, online_ids, online_scores)) + online_im = plot_tracking(img_info['raw_img'], online_tlwhs, online_ids, frame_id=frame_id + 1, + fps=1. / timer.average_time) + vid_writer.write(online_im) + ch = cv2.waitKey(1) + if ch == 27 or ch == ord("q") or ch == ord("Q"): + break + else: + break + frame_id += 1 + + +if __name__ == '__main__': + args = make_parser().parse_args() + + predictor = Predictor(args) + imageflow_demo(predictor, args) \ No newline at end of file diff --git a/deploy/TensorRT/cpp/CMakeLists.txt b/deploy/TensorRT/cpp/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..dabf6220c95557d2d728419f18844e938483d79a --- /dev/null +++ b/deploy/TensorRT/cpp/CMakeLists.txt @@ -0,0 +1,39 @@ +cmake_minimum_required(VERSION 2.6) + +project(bytetrack) + +add_definitions(-std=c++11) + +option(CUDA_USE_STATIC_CUDA_RUNTIME OFF) +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_BUILD_TYPE Debug) + +find_package(CUDA REQUIRED) + +include_directories(${PROJECT_SOURCE_DIR}/include) +include_directories(/usr/local/include/eigen3) +link_directories(${PROJECT_SOURCE_DIR}/include) +# include and link dirs of cuda and tensorrt, you need adapt them if yours are different +# cuda +include_directories(/usr/local/cuda/include) +link_directories(/usr/local/cuda/lib64) +# cudnn +include_directories(/data/cuda/cuda-10.2/cudnn/v8.0.4/include) +link_directories(/data/cuda/cuda-10.2/cudnn/v8.0.4/lib64) +# tensorrt +include_directories(/opt/tiger/demo/TensorRT-7.2.3.4/include) +link_directories(/opt/tiger/demo/TensorRT-7.2.3.4/lib) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Ofast -Wfatal-errors -D_MWAITXINTRIN_H_INCLUDED") + +find_package(OpenCV) +include_directories(${OpenCV_INCLUDE_DIRS}) + +file(GLOB My_Source_Files ${PROJECT_SOURCE_DIR}/src/*.cpp) +add_executable(bytetrack ${My_Source_Files}) +target_link_libraries(bytetrack nvinfer) +target_link_libraries(bytetrack cudart) +target_link_libraries(bytetrack ${OpenCV_LIBS}) + +add_definitions(-O2 -pthread) + diff --git a/deploy/TensorRT/cpp/README.md b/deploy/TensorRT/cpp/README.md new file mode 100644 index 0000000000000000000000000000000000000000..cb2bfae1054140d2863b10738e7ae5b91c52b28a --- /dev/null +++ b/deploy/TensorRT/cpp/README.md @@ -0,0 +1,58 @@ +# ByteTrack-TensorRT in C++ + +## Installation + +Install opencv with ```sudo apt-get install libopencv-dev``` (we don't need a higher version of opencv like v3.3+). + +Install eigen-3.3.9 [[google]](https://drive.google.com/file/d/1rqO74CYCNrmRAg8Rra0JP3yZtJ-rfket/view?usp=sharing), [[baidu(code:ueq4)]](https://pan.baidu.com/s/15kEfCxpy-T7tz60msxxExg). + +```shell +unzip eigen-3.3.9.zip +cd eigen-3.3.9 +mkdir build +cd build +cmake .. +sudo make install +``` + +## Prepare serialized engine file + +Follow the TensorRT Python demo to convert and save the serialized engine file. + +Check the 'model_trt.engine' file, which will be automatically saved at the YOLOX_output dir. + +## Build the demo + +You should set the TensorRT path and CUDA path in CMakeLists.txt. + +For bytetrack_s model, we set the input frame size 1088 x 608. For bytetrack_m, bytetrack_l, bytetrack_x models, we set the input frame size 1440 x 800. You can modify the INPUT_W and INPUT_H in src/bytetrack.cpp + +```c++ +static const int INPUT_W = 1088; +static const int INPUT_H = 608; +``` + +You can first build the demo: + +```shell +cd /demo/TensorRT/cpp +mkdir build +cd build +cmake .. +make +``` + +Then you can run the demo with **200 FPS**: + +```shell +./bytetrack ../../../../YOLOX_outputs/yolox_s_mix_det/model_trt.engine -i ../../../../videos/palace.mp4 +``` + +(If you find the output video lose some frames, you can convert the input video by running: + +```shell +cd +python3 tools/convert_video.py +``` +to generate an appropriate input video for TensorRT C++ demo. ) + diff --git a/deploy/TensorRT/cpp/include/BYTETracker.h b/deploy/TensorRT/cpp/include/BYTETracker.h new file mode 100644 index 0000000000000000000000000000000000000000..e3dda973fa27ccdb85a27841ec2a1cf8dcc1e9b0 --- /dev/null +++ b/deploy/TensorRT/cpp/include/BYTETracker.h @@ -0,0 +1,49 @@ +#pragma once + +#include "STrack.h" + +struct Object +{ + cv::Rect_ rect; + int label; + float prob; +}; + +class BYTETracker +{ +public: + BYTETracker(int frame_rate = 30, int track_buffer = 30); + ~BYTETracker(); + + vector update(const vector& objects); + Scalar get_color(int idx); + +private: + vector joint_stracks(vector &tlista, vector &tlistb); + vector joint_stracks(vector &tlista, vector &tlistb); + + vector sub_stracks(vector &tlista, vector &tlistb); + void remove_duplicate_stracks(vector &resa, vector &resb, vector &stracksa, vector &stracksb); + + void linear_assignment(vector > &cost_matrix, int cost_matrix_size, int cost_matrix_size_size, float thresh, + vector > &matches, vector &unmatched_a, vector &unmatched_b); + vector > iou_distance(vector &atracks, vector &btracks, int &dist_size, int &dist_size_size); + vector > iou_distance(vector &atracks, vector &btracks); + vector > ious(vector > &atlbrs, vector > &btlbrs); + + double lapjv(const vector > &cost, vector &rowsol, vector &colsol, + bool extend_cost = false, float cost_limit = LONG_MAX, bool return_cost = true); + +private: + + float track_thresh; + float high_thresh; + float match_thresh; + int frame_id; + int max_time_lost; + + vector tracked_stracks; + vector lost_stracks; + vector removed_stracks; + byte_kalman::KalmanFilter kalman_filter; +}; \ No newline at end of file diff --git a/deploy/TensorRT/cpp/include/STrack.h b/deploy/TensorRT/cpp/include/STrack.h new file mode 100644 index 0000000000000000000000000000000000000000..752cbefa8f7f7f4f0aff08e0e28ff036afe7d61a --- /dev/null +++ b/deploy/TensorRT/cpp/include/STrack.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include "kalmanFilter.h" + +using namespace cv; +using namespace std; + +enum TrackState { New = 0, Tracked, Lost, Removed }; + +class STrack +{ +public: + STrack(vector tlwh_, float score); + ~STrack(); + + vector static tlbr_to_tlwh(vector &tlbr); + void static multi_predict(vector &stracks, byte_kalman::KalmanFilter &kalman_filter); + void static_tlwh(); + void static_tlbr(); + vector tlwh_to_xyah(vector tlwh_tmp); + vector to_xyah(); + void mark_lost(); + void mark_removed(); + int next_id(); + int end_frame(); + + void activate(byte_kalman::KalmanFilter &kalman_filter, int frame_id); + void re_activate(STrack &new_track, int frame_id, bool new_id = false); + void update(STrack &new_track, int frame_id); + +public: + bool is_activated; + int track_id; + int state; + + vector _tlwh; + vector tlwh; + vector tlbr; + int frame_id; + int tracklet_len; + int start_frame; + + KAL_MEAN mean; + KAL_COVA covariance; + float score; + +private: + byte_kalman::KalmanFilter kalman_filter; +}; \ No newline at end of file diff --git a/deploy/TensorRT/cpp/include/dataType.h b/deploy/TensorRT/cpp/include/dataType.h new file mode 100644 index 0000000000000000000000000000000000000000..a7821a395c1c03db137587b879b255846fb0ca16 --- /dev/null +++ b/deploy/TensorRT/cpp/include/dataType.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include + +#include +#include +typedef Eigen::Matrix DETECTBOX; +typedef Eigen::Matrix DETECTBOXSS; +typedef Eigen::Matrix FEATURE; +typedef Eigen::Matrix FEATURESS; +//typedef std::vector FEATURESS; + +//Kalmanfilter +//typedef Eigen::Matrix KAL_FILTER; +typedef Eigen::Matrix KAL_MEAN; +typedef Eigen::Matrix KAL_COVA; +typedef Eigen::Matrix KAL_HMEAN; +typedef Eigen::Matrix KAL_HCOVA; +using KAL_DATA = std::pair; +using KAL_HDATA = std::pair; + +//main +using RESULT_DATA = std::pair; + +//tracker: +using TRACKER_DATA = std::pair; +using MATCH_DATA = std::pair; +typedef struct t { + std::vector matches; + std::vector unmatched_tracks; + std::vector unmatched_detections; +}TRACHER_MATCHD; + +//linear_assignment: +typedef Eigen::Matrix DYNAMICM; \ No newline at end of file diff --git a/deploy/TensorRT/cpp/include/kalmanFilter.h b/deploy/TensorRT/cpp/include/kalmanFilter.h new file mode 100644 index 0000000000000000000000000000000000000000..6596b54e33de75d1b49a8af9bfbb1f26d00ea786 --- /dev/null +++ b/deploy/TensorRT/cpp/include/kalmanFilter.h @@ -0,0 +1,31 @@ +#pragma once + +#include "dataType.h" + +namespace byte_kalman +{ + class KalmanFilter + { + public: + static const double chi2inv95[10]; + KalmanFilter(); + KAL_DATA initiate(const DETECTBOX& measurement); + void predict(KAL_MEAN& mean, KAL_COVA& covariance); + KAL_HDATA project(const KAL_MEAN& mean, const KAL_COVA& covariance); + KAL_DATA update(const KAL_MEAN& mean, + const KAL_COVA& covariance, + const DETECTBOX& measurement); + + Eigen::Matrix gating_distance( + const KAL_MEAN& mean, + const KAL_COVA& covariance, + const std::vector& measurements, + bool only_position = false); + + private: + Eigen::Matrix _motion_mat; + Eigen::Matrix _update_mat; + float _std_weight_position; + float _std_weight_velocity; + }; +} \ No newline at end of file diff --git a/deploy/TensorRT/cpp/include/lapjv.h b/deploy/TensorRT/cpp/include/lapjv.h new file mode 100644 index 0000000000000000000000000000000000000000..0e34385a647bec225827370ff0041a391e628477 --- /dev/null +++ b/deploy/TensorRT/cpp/include/lapjv.h @@ -0,0 +1,63 @@ +#ifndef LAPJV_H +#define LAPJV_H + +#define LARGE 1000000 + +#if !defined TRUE +#define TRUE 1 +#endif +#if !defined FALSE +#define FALSE 0 +#endif + +#define NEW(x, t, n) if ((x = (t *)malloc(sizeof(t) * (n))) == 0) { return -1; } +#define FREE(x) if (x != 0) { free(x); x = 0; } +#define SWAP_INDICES(a, b) { int_t _temp_index = a; a = b; b = _temp_index; } + +#if 0 +#include +#define ASSERT(cond) assert(cond) +#define PRINTF(fmt, ...) printf(fmt, ##__VA_ARGS__) +#define PRINT_COST_ARRAY(a, n) \ + while (1) { \ + printf(#a" = ["); \ + if ((n) > 0) { \ + printf("%f", (a)[0]); \ + for (uint_t j = 1; j < n; j++) { \ + printf(", %f", (a)[j]); \ + } \ + } \ + printf("]\n"); \ + break; \ + } +#define PRINT_INDEX_ARRAY(a, n) \ + while (1) { \ + printf(#a" = ["); \ + if ((n) > 0) { \ + printf("%d", (a)[0]); \ + for (uint_t j = 1; j < n; j++) { \ + printf(", %d", (a)[j]); \ + } \ + } \ + printf("]\n"); \ + break; \ + } +#else +#define ASSERT(cond) +#define PRINTF(fmt, ...) +#define PRINT_COST_ARRAY(a, n) +#define PRINT_INDEX_ARRAY(a, n) +#endif + + +typedef signed int int_t; +typedef unsigned int uint_t; +typedef double cost_t; +typedef char boolean; +typedef enum fp_t { FP_1 = 1, FP_2 = 2, FP_DYNAMIC = 3 } fp_t; + +extern int_t lapjv_internal( + const uint_t n, cost_t *cost[], + int_t *x, int_t *y); + +#endif // LAPJV_H \ No newline at end of file diff --git a/deploy/TensorRT/cpp/include/logging.h b/deploy/TensorRT/cpp/include/logging.h new file mode 100644 index 0000000000000000000000000000000000000000..602b69fb5759ac8401765bad5251928c59bac7c7 --- /dev/null +++ b/deploy/TensorRT/cpp/include/logging.h @@ -0,0 +1,503 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TENSORRT_LOGGING_H +#define TENSORRT_LOGGING_H + +#include "NvInferRuntimeCommon.h" +#include +#include +#include +#include +#include +#include +#include + +using Severity = nvinfer1::ILogger::Severity; + +class LogStreamConsumerBuffer : public std::stringbuf +{ +public: + LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog) + : mOutput(stream) + , mPrefix(prefix) + , mShouldLog(shouldLog) + { + } + + LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) + : mOutput(other.mOutput) + { + } + + ~LogStreamConsumerBuffer() + { + // std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence + // std::streambuf::pptr() gives a pointer to the current position of the output sequence + // if the pointer to the beginning is not equal to the pointer to the current position, + // call putOutput() to log the output to the stream + if (pbase() != pptr()) + { + putOutput(); + } + } + + // synchronizes the stream buffer and returns 0 on success + // synchronizing the stream buffer consists of inserting the buffer contents into the stream, + // resetting the buffer and flushing the stream + virtual int sync() + { + putOutput(); + return 0; + } + + void putOutput() + { + if (mShouldLog) + { + // prepend timestamp + std::time_t timestamp = std::time(nullptr); + tm* tm_local = std::localtime(×tamp); + std::cout << "["; + std::cout << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << "/"; + std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/"; + std::cout << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-"; + std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":"; + std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":"; + std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] "; + // std::stringbuf::str() gets the string contents of the buffer + // insert the buffer contents pre-appended by the appropriate prefix into the stream + mOutput << mPrefix << str(); + // set the buffer to empty + str(""); + // flush the stream + mOutput.flush(); + } + } + + void setShouldLog(bool shouldLog) + { + mShouldLog = shouldLog; + } + +private: + std::ostream& mOutput; + std::string mPrefix; + bool mShouldLog; +}; + +//! +//! \class LogStreamConsumerBase +//! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer +//! +class LogStreamConsumerBase +{ +public: + LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog) + : mBuffer(stream, prefix, shouldLog) + { + } + +protected: + LogStreamConsumerBuffer mBuffer; +}; + +//! +//! \class LogStreamConsumer +//! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages. +//! Order of base classes is LogStreamConsumerBase and then std::ostream. +//! This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field +//! in LogStreamConsumer and then the address of the buffer is passed to std::ostream. +//! This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream. +//! Please do not change the order of the parent classes. +//! +class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream +{ +public: + //! \brief Creates a LogStreamConsumer which logs messages with level severity. + //! Reportable severity determines if the messages are severe enough to be logged. + LogStreamConsumer(Severity reportableSeverity, Severity severity) + : LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity) + , std::ostream(&mBuffer) // links the stream buffer with the stream + , mShouldLog(severity <= reportableSeverity) + , mSeverity(severity) + { + } + + LogStreamConsumer(LogStreamConsumer&& other) + : LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog) + , std::ostream(&mBuffer) // links the stream buffer with the stream + , mShouldLog(other.mShouldLog) + , mSeverity(other.mSeverity) + { + } + + void setReportableSeverity(Severity reportableSeverity) + { + mShouldLog = mSeverity <= reportableSeverity; + mBuffer.setShouldLog(mShouldLog); + } + +private: + static std::ostream& severityOstream(Severity severity) + { + return severity >= Severity::kINFO ? std::cout : std::cerr; + } + + static std::string severityPrefix(Severity severity) + { + switch (severity) + { + case Severity::kINTERNAL_ERROR: return "[F] "; + case Severity::kERROR: return "[E] "; + case Severity::kWARNING: return "[W] "; + case Severity::kINFO: return "[I] "; + case Severity::kVERBOSE: return "[V] "; + default: assert(0); return ""; + } + } + + bool mShouldLog; + Severity mSeverity; +}; + +//! \class Logger +//! +//! \brief Class which manages logging of TensorRT tools and samples +//! +//! \details This class provides a common interface for TensorRT tools and samples to log information to the console, +//! and supports logging two types of messages: +//! +//! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal) +//! - Test pass/fail messages +//! +//! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is +//! that the logic for controlling the verbosity and formatting of sample output is centralized in one location. +//! +//! In the future, this class could be extended to support dumping test results to a file in some standard format +//! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run). +//! +//! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger +//! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT +//! library and messages coming from the sample. +//! +//! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the +//! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger +//! object. + +class Logger : public nvinfer1::ILogger +{ +public: + Logger(Severity severity = Severity::kWARNING) + : mReportableSeverity(severity) + { + } + + //! + //! \enum TestResult + //! \brief Represents the state of a given test + //! + enum class TestResult + { + kRUNNING, //!< The test is running + kPASSED, //!< The test passed + kFAILED, //!< The test failed + kWAIVED //!< The test was waived + }; + + //! + //! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger + //! \return The nvinfer1::ILogger associated with this Logger + //! + //! TODO Once all samples are updated to use this method to register the logger with TensorRT, + //! we can eliminate the inheritance of Logger from ILogger + //! + nvinfer1::ILogger& getTRTLogger() + { + return *this; + } + + //! + //! \brief Implementation of the nvinfer1::ILogger::log() virtual method + //! + //! Note samples should not be calling this function directly; it will eventually go away once we eliminate the + //! inheritance from nvinfer1::ILogger + //! + void log(Severity severity, const char* msg) override + { + LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl; + } + + //! + //! \brief Method for controlling the verbosity of logging output + //! + //! \param severity The logger will only emit messages that have severity of this level or higher. + //! + void setReportableSeverity(Severity severity) + { + mReportableSeverity = severity; + } + + //! + //! \brief Opaque handle that holds logging information for a particular test + //! + //! This object is an opaque handle to information used by the Logger to print test results. + //! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used + //! with Logger::reportTest{Start,End}(). + //! + class TestAtom + { + public: + TestAtom(TestAtom&&) = default; + + private: + friend class Logger; + + TestAtom(bool started, const std::string& name, const std::string& cmdline) + : mStarted(started) + , mName(name) + , mCmdline(cmdline) + { + } + + bool mStarted; + std::string mName; + std::string mCmdline; + }; + + //! + //! \brief Define a test for logging + //! + //! \param[in] name The name of the test. This should be a string starting with + //! "TensorRT" and containing dot-separated strings containing + //! the characters [A-Za-z0-9_]. + //! For example, "TensorRT.sample_googlenet" + //! \param[in] cmdline The command line used to reproduce the test + // + //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). + //! + static TestAtom defineTest(const std::string& name, const std::string& cmdline) + { + return TestAtom(false, name, cmdline); + } + + //! + //! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments + //! as input + //! + //! \param[in] name The name of the test + //! \param[in] argc The number of command-line arguments + //! \param[in] argv The array of command-line arguments (given as C strings) + //! + //! \return a TestAtom that can be used in Logger::reportTest{Start,End}(). + static TestAtom defineTest(const std::string& name, int argc, char const* const* argv) + { + auto cmdline = genCmdlineString(argc, argv); + return defineTest(name, cmdline); + } + + //! + //! \brief Report that a test has started. + //! + //! \pre reportTestStart() has not been called yet for the given testAtom + //! + //! \param[in] testAtom The handle to the test that has started + //! + static void reportTestStart(TestAtom& testAtom) + { + reportTestResult(testAtom, TestResult::kRUNNING); + assert(!testAtom.mStarted); + testAtom.mStarted = true; + } + + //! + //! \brief Report that a test has ended. + //! + //! \pre reportTestStart() has been called for the given testAtom + //! + //! \param[in] testAtom The handle to the test that has ended + //! \param[in] result The result of the test. Should be one of TestResult::kPASSED, + //! TestResult::kFAILED, TestResult::kWAIVED + //! + static void reportTestEnd(const TestAtom& testAtom, TestResult result) + { + assert(result != TestResult::kRUNNING); + assert(testAtom.mStarted); + reportTestResult(testAtom, result); + } + + static int reportPass(const TestAtom& testAtom) + { + reportTestEnd(testAtom, TestResult::kPASSED); + return EXIT_SUCCESS; + } + + static int reportFail(const TestAtom& testAtom) + { + reportTestEnd(testAtom, TestResult::kFAILED); + return EXIT_FAILURE; + } + + static int reportWaive(const TestAtom& testAtom) + { + reportTestEnd(testAtom, TestResult::kWAIVED); + return EXIT_SUCCESS; + } + + static int reportTest(const TestAtom& testAtom, bool pass) + { + return pass ? reportPass(testAtom) : reportFail(testAtom); + } + + Severity getReportableSeverity() const + { + return mReportableSeverity; + } + +private: + //! + //! \brief returns an appropriate string for prefixing a log message with the given severity + //! + static const char* severityPrefix(Severity severity) + { + switch (severity) + { + case Severity::kINTERNAL_ERROR: return "[F] "; + case Severity::kERROR: return "[E] "; + case Severity::kWARNING: return "[W] "; + case Severity::kINFO: return "[I] "; + case Severity::kVERBOSE: return "[V] "; + default: assert(0); return ""; + } + } + + //! + //! \brief returns an appropriate string for prefixing a test result message with the given result + //! + static const char* testResultString(TestResult result) + { + switch (result) + { + case TestResult::kRUNNING: return "RUNNING"; + case TestResult::kPASSED: return "PASSED"; + case TestResult::kFAILED: return "FAILED"; + case TestResult::kWAIVED: return "WAIVED"; + default: assert(0); return ""; + } + } + + //! + //! \brief returns an appropriate output stream (cout or cerr) to use with the given severity + //! + static std::ostream& severityOstream(Severity severity) + { + return severity >= Severity::kINFO ? std::cout : std::cerr; + } + + //! + //! \brief method that implements logging test results + //! + static void reportTestResult(const TestAtom& testAtom, TestResult result) + { + severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # " + << testAtom.mCmdline << std::endl; + } + + //! + //! \brief generate a command line string from the given (argc, argv) values + //! + static std::string genCmdlineString(int argc, char const* const* argv) + { + std::stringstream ss; + for (int i = 0; i < argc; i++) + { + if (i > 0) + ss << " "; + ss << argv[i]; + } + return ss.str(); + } + + Severity mReportableSeverity; +}; + +namespace +{ + +//! +//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE +//! +//! Example usage: +//! +//! LOG_VERBOSE(logger) << "hello world" << std::endl; +//! +inline LogStreamConsumer LOG_VERBOSE(const Logger& logger) +{ + return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE); +} + +//! +//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO +//! +//! Example usage: +//! +//! LOG_INFO(logger) << "hello world" << std::endl; +//! +inline LogStreamConsumer LOG_INFO(const Logger& logger) +{ + return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO); +} + +//! +//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING +//! +//! Example usage: +//! +//! LOG_WARN(logger) << "hello world" << std::endl; +//! +inline LogStreamConsumer LOG_WARN(const Logger& logger) +{ + return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING); +} + +//! +//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR +//! +//! Example usage: +//! +//! LOG_ERROR(logger) << "hello world" << std::endl; +//! +inline LogStreamConsumer LOG_ERROR(const Logger& logger) +{ + return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR); +} + +//! +//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR +// ("fatal" severity) +//! +//! Example usage: +//! +//! LOG_FATAL(logger) << "hello world" << std::endl; +//! +inline LogStreamConsumer LOG_FATAL(const Logger& logger) +{ + return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR); +} + +} // anonymous namespace + +#endif // TENSORRT_LOGGING_H diff --git a/deploy/TensorRT/cpp/src/BYTETracker.cpp b/deploy/TensorRT/cpp/src/BYTETracker.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7c936b81f2e95f335ec90b8c355360bc0ebee800 --- /dev/null +++ b/deploy/TensorRT/cpp/src/BYTETracker.cpp @@ -0,0 +1,241 @@ +#include "BYTETracker.h" +#include + +BYTETracker::BYTETracker(int frame_rate, int track_buffer) +{ + track_thresh = 0.5; + high_thresh = 0.6; + match_thresh = 0.8; + + frame_id = 0; + max_time_lost = int(frame_rate / 30.0 * track_buffer); + cout << "Init ByteTrack!" << endl; +} + +BYTETracker::~BYTETracker() +{ +} + +vector BYTETracker::update(const vector& objects) +{ + + ////////////////// Step 1: Get detections ////////////////// + this->frame_id++; + vector activated_stracks; + vector refind_stracks; + vector removed_stracks; + vector lost_stracks; + vector detections; + vector detections_low; + + vector detections_cp; + vector tracked_stracks_swap; + vector resa, resb; + vector output_stracks; + + vector unconfirmed; + vector tracked_stracks; + vector strack_pool; + vector r_tracked_stracks; + + if (objects.size() > 0) + { + for (int i = 0; i < objects.size(); i++) + { + vector tlbr_; + tlbr_.resize(4); + tlbr_[0] = objects[i].rect.x; + tlbr_[1] = objects[i].rect.y; + tlbr_[2] = objects[i].rect.x + objects[i].rect.width; + tlbr_[3] = objects[i].rect.y + objects[i].rect.height; + + float score = objects[i].prob; + + STrack strack(STrack::tlbr_to_tlwh(tlbr_), score); + if (score >= track_thresh) + { + detections.push_back(strack); + } + else + { + detections_low.push_back(strack); + } + + } + } + + // Add newly detected tracklets to tracked_stracks + for (int i = 0; i < this->tracked_stracks.size(); i++) + { + if (!this->tracked_stracks[i].is_activated) + unconfirmed.push_back(&this->tracked_stracks[i]); + else + tracked_stracks.push_back(&this->tracked_stracks[i]); + } + + ////////////////// Step 2: First association, with IoU ////////////////// + strack_pool = joint_stracks(tracked_stracks, this->lost_stracks); + STrack::multi_predict(strack_pool, this->kalman_filter); + + vector > dists; + int dist_size = 0, dist_size_size = 0; + dists = iou_distance(strack_pool, detections, dist_size, dist_size_size); + + vector > matches; + vector u_track, u_detection; + linear_assignment(dists, dist_size, dist_size_size, match_thresh, matches, u_track, u_detection); + + for (int i = 0; i < matches.size(); i++) + { + STrack *track = strack_pool[matches[i][0]]; + STrack *det = &detections[matches[i][1]]; + if (track->state == TrackState::Tracked) + { + track->update(*det, this->frame_id); + activated_stracks.push_back(*track); + } + else + { + track->re_activate(*det, this->frame_id, false); + refind_stracks.push_back(*track); + } + } + + ////////////////// Step 3: Second association, using low score dets ////////////////// + for (int i = 0; i < u_detection.size(); i++) + { + detections_cp.push_back(detections[u_detection[i]]); + } + detections.clear(); + detections.assign(detections_low.begin(), detections_low.end()); + + for (int i = 0; i < u_track.size(); i++) + { + if (strack_pool[u_track[i]]->state == TrackState::Tracked) + { + r_tracked_stracks.push_back(strack_pool[u_track[i]]); + } + } + + dists.clear(); + dists = iou_distance(r_tracked_stracks, detections, dist_size, dist_size_size); + + matches.clear(); + u_track.clear(); + u_detection.clear(); + linear_assignment(dists, dist_size, dist_size_size, 0.5, matches, u_track, u_detection); + + for (int i = 0; i < matches.size(); i++) + { + STrack *track = r_tracked_stracks[matches[i][0]]; + STrack *det = &detections[matches[i][1]]; + if (track->state == TrackState::Tracked) + { + track->update(*det, this->frame_id); + activated_stracks.push_back(*track); + } + else + { + track->re_activate(*det, this->frame_id, false); + refind_stracks.push_back(*track); + } + } + + for (int i = 0; i < u_track.size(); i++) + { + STrack *track = r_tracked_stracks[u_track[i]]; + if (track->state != TrackState::Lost) + { + track->mark_lost(); + lost_stracks.push_back(*track); + } + } + + // Deal with unconfirmed tracks, usually tracks with only one beginning frame + detections.clear(); + detections.assign(detections_cp.begin(), detections_cp.end()); + + dists.clear(); + dists = iou_distance(unconfirmed, detections, dist_size, dist_size_size); + + matches.clear(); + vector u_unconfirmed; + u_detection.clear(); + linear_assignment(dists, dist_size, dist_size_size, 0.7, matches, u_unconfirmed, u_detection); + + for (int i = 0; i < matches.size(); i++) + { + unconfirmed[matches[i][0]]->update(detections[matches[i][1]], this->frame_id); + activated_stracks.push_back(*unconfirmed[matches[i][0]]); + } + + for (int i = 0; i < u_unconfirmed.size(); i++) + { + STrack *track = unconfirmed[u_unconfirmed[i]]; + track->mark_removed(); + removed_stracks.push_back(*track); + } + + ////////////////// Step 4: Init new stracks ////////////////// + for (int i = 0; i < u_detection.size(); i++) + { + STrack *track = &detections[u_detection[i]]; + if (track->score < this->high_thresh) + continue; + track->activate(this->kalman_filter, this->frame_id); + activated_stracks.push_back(*track); + } + + ////////////////// Step 5: Update state ////////////////// + for (int i = 0; i < this->lost_stracks.size(); i++) + { + if (this->frame_id - this->lost_stracks[i].end_frame() > this->max_time_lost) + { + this->lost_stracks[i].mark_removed(); + removed_stracks.push_back(this->lost_stracks[i]); + } + } + + for (int i = 0; i < this->tracked_stracks.size(); i++) + { + if (this->tracked_stracks[i].state == TrackState::Tracked) + { + tracked_stracks_swap.push_back(this->tracked_stracks[i]); + } + } + this->tracked_stracks.clear(); + this->tracked_stracks.assign(tracked_stracks_swap.begin(), tracked_stracks_swap.end()); + + this->tracked_stracks = joint_stracks(this->tracked_stracks, activated_stracks); + this->tracked_stracks = joint_stracks(this->tracked_stracks, refind_stracks); + + //std::cout << activated_stracks.size() << std::endl; + + this->lost_stracks = sub_stracks(this->lost_stracks, this->tracked_stracks); + for (int i = 0; i < lost_stracks.size(); i++) + { + this->lost_stracks.push_back(lost_stracks[i]); + } + + this->lost_stracks = sub_stracks(this->lost_stracks, this->removed_stracks); + for (int i = 0; i < removed_stracks.size(); i++) + { + this->removed_stracks.push_back(removed_stracks[i]); + } + + remove_duplicate_stracks(resa, resb, this->tracked_stracks, this->lost_stracks); + + this->tracked_stracks.clear(); + this->tracked_stracks.assign(resa.begin(), resa.end()); + this->lost_stracks.clear(); + this->lost_stracks.assign(resb.begin(), resb.end()); + + for (int i = 0; i < this->tracked_stracks.size(); i++) + { + if (this->tracked_stracks[i].is_activated) + { + output_stracks.push_back(this->tracked_stracks[i]); + } + } + return output_stracks; +} \ No newline at end of file diff --git a/deploy/TensorRT/cpp/src/STrack.cpp b/deploy/TensorRT/cpp/src/STrack.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8306165304355fe6d3d6e244207211757f21a646 --- /dev/null +++ b/deploy/TensorRT/cpp/src/STrack.cpp @@ -0,0 +1,192 @@ +#include "STrack.h" + +STrack::STrack(vector tlwh_, float score) +{ + _tlwh.resize(4); + _tlwh.assign(tlwh_.begin(), tlwh_.end()); + + is_activated = false; + track_id = 0; + state = TrackState::New; + + tlwh.resize(4); + tlbr.resize(4); + + static_tlwh(); + static_tlbr(); + frame_id = 0; + tracklet_len = 0; + this->score = score; + start_frame = 0; +} + +STrack::~STrack() +{ +} + +void STrack::activate(byte_kalman::KalmanFilter &kalman_filter, int frame_id) +{ + this->kalman_filter = kalman_filter; + this->track_id = this->next_id(); + + vector _tlwh_tmp(4); + _tlwh_tmp[0] = this->_tlwh[0]; + _tlwh_tmp[1] = this->_tlwh[1]; + _tlwh_tmp[2] = this->_tlwh[2]; + _tlwh_tmp[3] = this->_tlwh[3]; + vector xyah = tlwh_to_xyah(_tlwh_tmp); + DETECTBOX xyah_box; + xyah_box[0] = xyah[0]; + xyah_box[1] = xyah[1]; + xyah_box[2] = xyah[2]; + xyah_box[3] = xyah[3]; + auto mc = this->kalman_filter.initiate(xyah_box); + this->mean = mc.first; + this->covariance = mc.second; + + static_tlwh(); + static_tlbr(); + + this->tracklet_len = 0; + this->state = TrackState::Tracked; + if (frame_id == 1) + { + this->is_activated = true; + } + //this->is_activated = true; + this->frame_id = frame_id; + this->start_frame = frame_id; +} + +void STrack::re_activate(STrack &new_track, int frame_id, bool new_id) +{ + vector xyah = tlwh_to_xyah(new_track.tlwh); + DETECTBOX xyah_box; + xyah_box[0] = xyah[0]; + xyah_box[1] = xyah[1]; + xyah_box[2] = xyah[2]; + xyah_box[3] = xyah[3]; + auto mc = this->kalman_filter.update(this->mean, this->covariance, xyah_box); + this->mean = mc.first; + this->covariance = mc.second; + + static_tlwh(); + static_tlbr(); + + this->tracklet_len = 0; + this->state = TrackState::Tracked; + this->is_activated = true; + this->frame_id = frame_id; + this->score = new_track.score; + if (new_id) + this->track_id = next_id(); +} + +void STrack::update(STrack &new_track, int frame_id) +{ + this->frame_id = frame_id; + this->tracklet_len++; + + vector xyah = tlwh_to_xyah(new_track.tlwh); + DETECTBOX xyah_box; + xyah_box[0] = xyah[0]; + xyah_box[1] = xyah[1]; + xyah_box[2] = xyah[2]; + xyah_box[3] = xyah[3]; + + auto mc = this->kalman_filter.update(this->mean, this->covariance, xyah_box); + this->mean = mc.first; + this->covariance = mc.second; + + static_tlwh(); + static_tlbr(); + + this->state = TrackState::Tracked; + this->is_activated = true; + + this->score = new_track.score; +} + +void STrack::static_tlwh() +{ + if (this->state == TrackState::New) + { + tlwh[0] = _tlwh[0]; + tlwh[1] = _tlwh[1]; + tlwh[2] = _tlwh[2]; + tlwh[3] = _tlwh[3]; + return; + } + + tlwh[0] = mean[0]; + tlwh[1] = mean[1]; + tlwh[2] = mean[2]; + tlwh[3] = mean[3]; + + tlwh[2] *= tlwh[3]; + tlwh[0] -= tlwh[2] / 2; + tlwh[1] -= tlwh[3] / 2; +} + +void STrack::static_tlbr() +{ + tlbr.clear(); + tlbr.assign(tlwh.begin(), tlwh.end()); + tlbr[2] += tlbr[0]; + tlbr[3] += tlbr[1]; +} + +vector STrack::tlwh_to_xyah(vector tlwh_tmp) +{ + vector tlwh_output = tlwh_tmp; + tlwh_output[0] += tlwh_output[2] / 2; + tlwh_output[1] += tlwh_output[3] / 2; + tlwh_output[2] /= tlwh_output[3]; + return tlwh_output; +} + +vector STrack::to_xyah() +{ + return tlwh_to_xyah(tlwh); +} + +vector STrack::tlbr_to_tlwh(vector &tlbr) +{ + tlbr[2] -= tlbr[0]; + tlbr[3] -= tlbr[1]; + return tlbr; +} + +void STrack::mark_lost() +{ + state = TrackState::Lost; +} + +void STrack::mark_removed() +{ + state = TrackState::Removed; +} + +int STrack::next_id() +{ + static int _count = 0; + _count++; + return _count; +} + +int STrack::end_frame() +{ + return this->frame_id; +} + +void STrack::multi_predict(vector &stracks, byte_kalman::KalmanFilter &kalman_filter) +{ + for (int i = 0; i < stracks.size(); i++) + { + if (stracks[i]->state != TrackState::Tracked) + { + stracks[i]->mean[7] = 0; + } + kalman_filter.predict(stracks[i]->mean, stracks[i]->covariance); + } +} \ No newline at end of file diff --git a/deploy/TensorRT/cpp/src/bytetrack.cpp b/deploy/TensorRT/cpp/src/bytetrack.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3f359a6a55620e3362c2421c21d00bb1add3beec --- /dev/null +++ b/deploy/TensorRT/cpp/src/bytetrack.cpp @@ -0,0 +1,506 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "NvInfer.h" +#include "cuda_runtime_api.h" +#include "logging.h" +#include "BYTETracker.h" + +#define CHECK(status) \ + do\ + {\ + auto ret = (status);\ + if (ret != 0)\ + {\ + cerr << "Cuda failure: " << ret << endl;\ + abort();\ + }\ + } while (0) + +#define DEVICE 0 // GPU id +#define NMS_THRESH 0.7 +#define BBOX_CONF_THRESH 0.1 + +using namespace nvinfer1; + +// stuff we know about the network and the input/output blobs +static const int INPUT_W = 1088; +static const int INPUT_H = 608; +const char* INPUT_BLOB_NAME = "input_0"; +const char* OUTPUT_BLOB_NAME = "output_0"; +static Logger gLogger; + +Mat static_resize(Mat& img) { + float r = min(INPUT_W / (img.cols*1.0), INPUT_H / (img.rows*1.0)); + // r = std::min(r, 1.0f); + int unpad_w = r * img.cols; + int unpad_h = r * img.rows; + Mat re(unpad_h, unpad_w, CV_8UC3); + resize(img, re, re.size()); + Mat out(INPUT_H, INPUT_W, CV_8UC3, Scalar(114, 114, 114)); + re.copyTo(out(Rect(0, 0, re.cols, re.rows))); + return out; +} + +struct GridAndStride +{ + int grid0; + int grid1; + int stride; +}; + +static void generate_grids_and_stride(const int target_w, const int target_h, vector& strides, vector& grid_strides) +{ + for (auto stride : strides) + { + int num_grid_w = target_w / stride; + int num_grid_h = target_h / stride; + for (int g1 = 0; g1 < num_grid_h; g1++) + { + for (int g0 = 0; g0 < num_grid_w; g0++) + { + grid_strides.push_back((GridAndStride){g0, g1, stride}); + } + } + } +} + +static inline float intersection_area(const Object& a, const Object& b) +{ + Rect_ inter = a.rect & b.rect; + return inter.area(); +} + +static void qsort_descent_inplace(vector& faceobjects, int left, int right) +{ + int i = left; + int j = right; + float p = faceobjects[(left + right) / 2].prob; + + while (i <= j) + { + while (faceobjects[i].prob > p) + i++; + + while (faceobjects[j].prob < p) + j--; + + if (i <= j) + { + // swap + swap(faceobjects[i], faceobjects[j]); + + i++; + j--; + } + } + + #pragma omp parallel sections + { + #pragma omp section + { + if (left < j) qsort_descent_inplace(faceobjects, left, j); + } + #pragma omp section + { + if (i < right) qsort_descent_inplace(faceobjects, i, right); + } + } +} + +static void qsort_descent_inplace(vector& objects) +{ + if (objects.empty()) + return; + + qsort_descent_inplace(objects, 0, objects.size() - 1); +} + +static void nms_sorted_bboxes(const vector& faceobjects, vector& picked, float nms_threshold) +{ + picked.clear(); + + const int n = faceobjects.size(); + + vector areas(n); + for (int i = 0; i < n; i++) + { + areas[i] = faceobjects[i].rect.area(); + } + + for (int i = 0; i < n; i++) + { + const Object& a = faceobjects[i]; + + int keep = 1; + for (int j = 0; j < (int)picked.size(); j++) + { + const Object& b = faceobjects[picked[j]]; + + // intersection over union + float inter_area = intersection_area(a, b); + float union_area = areas[i] + areas[picked[j]] - inter_area; + // float IoU = inter_area / union_area + if (inter_area / union_area > nms_threshold) + keep = 0; + } + + if (keep) + picked.push_back(i); + } +} + + +static void generate_yolox_proposals(vector grid_strides, float* feat_blob, float prob_threshold, vector& objects) +{ + const int num_class = 1; + + const int num_anchors = grid_strides.size(); + + for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++) + { + const int grid0 = grid_strides[anchor_idx].grid0; + const int grid1 = grid_strides[anchor_idx].grid1; + const int stride = grid_strides[anchor_idx].stride; + + const int basic_pos = anchor_idx * (num_class + 5); + + // yolox/models/yolo_head.py decode logic + float x_center = (feat_blob[basic_pos+0] + grid0) * stride; + float y_center = (feat_blob[basic_pos+1] + grid1) * stride; + float w = exp(feat_blob[basic_pos+2]) * stride; + float h = exp(feat_blob[basic_pos+3]) * stride; + float x0 = x_center - w * 0.5f; + float y0 = y_center - h * 0.5f; + + float box_objectness = feat_blob[basic_pos+4]; + for (int class_idx = 0; class_idx < num_class; class_idx++) + { + float box_cls_score = feat_blob[basic_pos + 5 + class_idx]; + float box_prob = box_objectness * box_cls_score; + if (box_prob > prob_threshold) + { + Object obj; + obj.rect.x = x0; + obj.rect.y = y0; + obj.rect.width = w; + obj.rect.height = h; + obj.label = class_idx; + obj.prob = box_prob; + + objects.push_back(obj); + } + + } // class loop + + } // point anchor loop +} + +float* blobFromImage(Mat& img){ + cvtColor(img, img, COLOR_BGR2RGB); + + float* blob = new float[img.total()*3]; + int channels = 3; + int img_h = img.rows; + int img_w = img.cols; + vector mean = {0.485, 0.456, 0.406}; + vector std = {0.229, 0.224, 0.225}; + for (size_t c = 0; c < channels; c++) + { + for (size_t h = 0; h < img_h; h++) + { + for (size_t w = 0; w < img_w; w++) + { + blob[c * img_w * img_h + h * img_w + w] = + (((float)img.at(h, w)[c]) / 255.0f - mean[c]) / std[c]; + } + } + } + return blob; +} + + +static void decode_outputs(float* prob, vector& objects, float scale, const int img_w, const int img_h) { + vector proposals; + vector strides = {8, 16, 32}; + vector grid_strides; + generate_grids_and_stride(INPUT_W, INPUT_H, strides, grid_strides); + generate_yolox_proposals(grid_strides, prob, BBOX_CONF_THRESH, proposals); + //std::cout << "num of boxes before nms: " << proposals.size() << std::endl; + + qsort_descent_inplace(proposals); + + vector picked; + nms_sorted_bboxes(proposals, picked, NMS_THRESH); + + + int count = picked.size(); + + //std::cout << "num of boxes: " << count << std::endl; + + objects.resize(count); + for (int i = 0; i < count; i++) + { + objects[i] = proposals[picked[i]]; + + // adjust offset to original unpadded + float x0 = (objects[i].rect.x) / scale; + float y0 = (objects[i].rect.y) / scale; + float x1 = (objects[i].rect.x + objects[i].rect.width) / scale; + float y1 = (objects[i].rect.y + objects[i].rect.height) / scale; + + // clip + // x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f); + // y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f); + // x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f); + // y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f); + + objects[i].rect.x = x0; + objects[i].rect.y = y0; + objects[i].rect.width = x1 - x0; + objects[i].rect.height = y1 - y0; + } +} + +const float color_list[80][3] = +{ + {0.000, 0.447, 0.741}, + {0.850, 0.325, 0.098}, + {0.929, 0.694, 0.125}, + {0.494, 0.184, 0.556}, + {0.466, 0.674, 0.188}, + {0.301, 0.745, 0.933}, + {0.635, 0.078, 0.184}, + {0.300, 0.300, 0.300}, + {0.600, 0.600, 0.600}, + {1.000, 0.000, 0.000}, + {1.000, 0.500, 0.000}, + {0.749, 0.749, 0.000}, + {0.000, 1.000, 0.000}, + {0.000, 0.000, 1.000}, + {0.667, 0.000, 1.000}, + {0.333, 0.333, 0.000}, + {0.333, 0.667, 0.000}, + {0.333, 1.000, 0.000}, + {0.667, 0.333, 0.000}, + {0.667, 0.667, 0.000}, + {0.667, 1.000, 0.000}, + {1.000, 0.333, 0.000}, + {1.000, 0.667, 0.000}, + {1.000, 1.000, 0.000}, + {0.000, 0.333, 0.500}, + {0.000, 0.667, 0.500}, + {0.000, 1.000, 0.500}, + {0.333, 0.000, 0.500}, + {0.333, 0.333, 0.500}, + {0.333, 0.667, 0.500}, + {0.333, 1.000, 0.500}, + {0.667, 0.000, 0.500}, + {0.667, 0.333, 0.500}, + {0.667, 0.667, 0.500}, + {0.667, 1.000, 0.500}, + {1.000, 0.000, 0.500}, + {1.000, 0.333, 0.500}, + {1.000, 0.667, 0.500}, + {1.000, 1.000, 0.500}, + {0.000, 0.333, 1.000}, + {0.000, 0.667, 1.000}, + {0.000, 1.000, 1.000}, + {0.333, 0.000, 1.000}, + {0.333, 0.333, 1.000}, + {0.333, 0.667, 1.000}, + {0.333, 1.000, 1.000}, + {0.667, 0.000, 1.000}, + {0.667, 0.333, 1.000}, + {0.667, 0.667, 1.000}, + {0.667, 1.000, 1.000}, + {1.000, 0.000, 1.000}, + {1.000, 0.333, 1.000}, + {1.000, 0.667, 1.000}, + {0.333, 0.000, 0.000}, + {0.500, 0.000, 0.000}, + {0.667, 0.000, 0.000}, + {0.833, 0.000, 0.000}, + {1.000, 0.000, 0.000}, + {0.000, 0.167, 0.000}, + {0.000, 0.333, 0.000}, + {0.000, 0.500, 0.000}, + {0.000, 0.667, 0.000}, + {0.000, 0.833, 0.000}, + {0.000, 1.000, 0.000}, + {0.000, 0.000, 0.167}, + {0.000, 0.000, 0.333}, + {0.000, 0.000, 0.500}, + {0.000, 0.000, 0.667}, + {0.000, 0.000, 0.833}, + {0.000, 0.000, 1.000}, + {0.000, 0.000, 0.000}, + {0.143, 0.143, 0.143}, + {0.286, 0.286, 0.286}, + {0.429, 0.429, 0.429}, + {0.571, 0.571, 0.571}, + {0.714, 0.714, 0.714}, + {0.857, 0.857, 0.857}, + {0.000, 0.447, 0.741}, + {0.314, 0.717, 0.741}, + {0.50, 0.5, 0} +}; + +void doInference(IExecutionContext& context, float* input, float* output, const int output_size, Size input_shape) { + const ICudaEngine& engine = context.getEngine(); + + // Pointers to input and output device buffers to pass to engine. + // Engine requires exactly IEngine::getNbBindings() number of buffers. + assert(engine.getNbBindings() == 2); + void* buffers[2]; + + // In order to bind the buffers, we need to know the names of the input and output tensors. + // Note that indices are guaranteed to be less than IEngine::getNbBindings() + const int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME); + + assert(engine.getBindingDataType(inputIndex) == nvinfer1::DataType::kFLOAT); + const int outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME); + assert(engine.getBindingDataType(outputIndex) == nvinfer1::DataType::kFLOAT); + int mBatchSize = engine.getMaxBatchSize(); + + // Create GPU buffers on device + CHECK(cudaMalloc(&buffers[inputIndex], 3 * input_shape.height * input_shape.width * sizeof(float))); + CHECK(cudaMalloc(&buffers[outputIndex], output_size*sizeof(float))); + + // Create stream + cudaStream_t stream; + CHECK(cudaStreamCreate(&stream)); + + // DMA input batch data to device, infer on the batch asynchronously, and DMA output back to host + CHECK(cudaMemcpyAsync(buffers[inputIndex], input, 3 * input_shape.height * input_shape.width * sizeof(float), cudaMemcpyHostToDevice, stream)); + context.enqueue(1, buffers, stream, nullptr); + CHECK(cudaMemcpyAsync(output, buffers[outputIndex], output_size * sizeof(float), cudaMemcpyDeviceToHost, stream)); + cudaStreamSynchronize(stream); + + // Release stream and buffers + cudaStreamDestroy(stream); + CHECK(cudaFree(buffers[inputIndex])); + CHECK(cudaFree(buffers[outputIndex])); +} + +int main(int argc, char** argv) { + cudaSetDevice(DEVICE); + + // create a model using the API directly and serialize it to a stream + char *trtModelStream{nullptr}; + size_t size{0}; + + if (argc == 4 && string(argv[2]) == "-i") { + const string engine_file_path {argv[1]}; + ifstream file(engine_file_path, ios::binary); + if (file.good()) { + file.seekg(0, file.end); + size = file.tellg(); + file.seekg(0, file.beg); + trtModelStream = new char[size]; + assert(trtModelStream); + file.read(trtModelStream, size); + file.close(); + } + } else { + cerr << "arguments not right!" << endl; + cerr << "run 'python3 tools/trt.py -f exps/example/mot/yolox_s_mix_det.py -c pretrained/bytetrack_s_mot17.pth.tar' to serialize model first!" << std::endl; + cerr << "Then use the following command:" << endl; + cerr << "cd demo/TensorRT/cpp/build" << endl; + cerr << "./bytetrack ../../../../YOLOX_outputs/yolox_s_mix_det/model_trt.engine -i ../../../../videos/palace.mp4 // deserialize file and run inference" << std::endl; + return -1; + } + const string input_video_path {argv[3]}; + + IRuntime* runtime = createInferRuntime(gLogger); + assert(runtime != nullptr); + ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size); + assert(engine != nullptr); + IExecutionContext* context = engine->createExecutionContext(); + assert(context != nullptr); + delete[] trtModelStream; + auto out_dims = engine->getBindingDimensions(1); + auto output_size = 1; + for(int j=0;j(cap.get(CV_CAP_PROP_FRAME_COUNT)); + cout << "Total frames: " << nFrame << endl; + + VideoWriter writer("demo.mp4", CV_FOURCC('m', 'p', '4', 'v'), fps, Size(img_w, img_h)); + + Mat img; + BYTETracker tracker(fps, 30); + int num_frames = 0; + int total_ms = 0; + while (true) + { + if(!cap.read(img)) + break; + num_frames ++; + if (num_frames % 20 == 0) + { + cout << "Processing frame " << num_frames << " (" << num_frames * 1000000 / total_ms << " fps)" << endl; + } + if (img.empty()) + break; + Mat pr_img = static_resize(img); + + float* blob; + blob = blobFromImage(pr_img); + float scale = min(INPUT_W / (img.cols*1.0), INPUT_H / (img.rows*1.0)); + + // run inference + auto start = chrono::system_clock::now(); + doInference(*context, blob, prob, output_size, pr_img.size()); + vector objects; + decode_outputs(prob, objects, scale, img_w, img_h); + vector output_stracks = tracker.update(objects); + auto end = chrono::system_clock::now(); + total_ms = total_ms + chrono::duration_cast(end - start).count(); + + for (int i = 0; i < output_stracks.size(); i++) + { + vector tlwh = output_stracks[i].tlwh; + bool vertical = tlwh[2] / tlwh[3] > 1.6; + if (tlwh[2] * tlwh[3] > 20 && !vertical) + { + Scalar s = tracker.get_color(output_stracks[i].track_id); + putText(img, format("%d", output_stracks[i].track_id), Point(tlwh[0], tlwh[1] - 5), + 0, 0.6, Scalar(0, 0, 255), 2, LINE_AA); + rectangle(img, Rect(tlwh[0], tlwh[1], tlwh[2], tlwh[3]), s, 2); + } + } + putText(img, format("frame: %d fps: %d num: %d", num_frames, num_frames * 1000000 / total_ms, output_stracks.size()), + Point(0, 30), 0, 0.6, Scalar(0, 0, 255), 2, LINE_AA); + writer.write(img); + + delete blob; + char c = waitKey(1); + if (c > 0) + { + break; + } + } + cap.release(); + cout << "FPS: " << num_frames * 1000000 / total_ms << endl; + // destroy the engine + context->destroy(); + engine->destroy(); + runtime->destroy(); + return 0; +} diff --git a/deploy/TensorRT/cpp/src/kalmanFilter.cpp b/deploy/TensorRT/cpp/src/kalmanFilter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..168432a46810d0c1296f4b17500d41f8b4f308b4 --- /dev/null +++ b/deploy/TensorRT/cpp/src/kalmanFilter.cpp @@ -0,0 +1,152 @@ +#include "kalmanFilter.h" +#include + +namespace byte_kalman +{ + const double KalmanFilter::chi2inv95[10] = { + 0, + 3.8415, + 5.9915, + 7.8147, + 9.4877, + 11.070, + 12.592, + 14.067, + 15.507, + 16.919 + }; + KalmanFilter::KalmanFilter() + { + int ndim = 4; + double dt = 1.; + + _motion_mat = Eigen::MatrixXf::Identity(8, 8); + for (int i = 0; i < ndim; i++) { + _motion_mat(i, ndim + i) = dt; + } + _update_mat = Eigen::MatrixXf::Identity(4, 8); + + this->_std_weight_position = 1. / 20; + this->_std_weight_velocity = 1. / 160; + } + + KAL_DATA KalmanFilter::initiate(const DETECTBOX &measurement) + { + DETECTBOX mean_pos = measurement; + DETECTBOX mean_vel; + for (int i = 0; i < 4; i++) mean_vel(i) = 0; + + KAL_MEAN mean; + for (int i = 0; i < 8; i++) { + if (i < 4) mean(i) = mean_pos(i); + else mean(i) = mean_vel(i - 4); + } + + KAL_MEAN std; + std(0) = 2 * _std_weight_position * measurement[3]; + std(1) = 2 * _std_weight_position * measurement[3]; + std(2) = 1e-2; + std(3) = 2 * _std_weight_position * measurement[3]; + std(4) = 10 * _std_weight_velocity * measurement[3]; + std(5) = 10 * _std_weight_velocity * measurement[3]; + std(6) = 1e-5; + std(7) = 10 * _std_weight_velocity * measurement[3]; + + KAL_MEAN tmp = std.array().square(); + KAL_COVA var = tmp.asDiagonal(); + return std::make_pair(mean, var); + } + + void KalmanFilter::predict(KAL_MEAN &mean, KAL_COVA &covariance) + { + //revise the data; + DETECTBOX std_pos; + std_pos << _std_weight_position * mean(3), + _std_weight_position * mean(3), + 1e-2, + _std_weight_position * mean(3); + DETECTBOX std_vel; + std_vel << _std_weight_velocity * mean(3), + _std_weight_velocity * mean(3), + 1e-5, + _std_weight_velocity * mean(3); + KAL_MEAN tmp; + tmp.block<1, 4>(0, 0) = std_pos; + tmp.block<1, 4>(0, 4) = std_vel; + tmp = tmp.array().square(); + KAL_COVA motion_cov = tmp.asDiagonal(); + KAL_MEAN mean1 = this->_motion_mat * mean.transpose(); + KAL_COVA covariance1 = this->_motion_mat * covariance *(_motion_mat.transpose()); + covariance1 += motion_cov; + + mean = mean1; + covariance = covariance1; + } + + KAL_HDATA KalmanFilter::project(const KAL_MEAN &mean, const KAL_COVA &covariance) + { + DETECTBOX std; + std << _std_weight_position * mean(3), _std_weight_position * mean(3), + 1e-1, _std_weight_position * mean(3); + KAL_HMEAN mean1 = _update_mat * mean.transpose(); + KAL_HCOVA covariance1 = _update_mat * covariance * (_update_mat.transpose()); + Eigen::Matrix diag = std.asDiagonal(); + diag = diag.array().square().matrix(); + covariance1 += diag; + // covariance1.diagonal() << diag; + return std::make_pair(mean1, covariance1); + } + + KAL_DATA + KalmanFilter::update( + const KAL_MEAN &mean, + const KAL_COVA &covariance, + const DETECTBOX &measurement) + { + KAL_HDATA pa = project(mean, covariance); + KAL_HMEAN projected_mean = pa.first; + KAL_HCOVA projected_cov = pa.second; + + //chol_factor, lower = + //scipy.linalg.cho_factor(projected_cov, lower=True, check_finite=False) + //kalmain_gain = + //scipy.linalg.cho_solve((cho_factor, lower), + //np.dot(covariance, self._upadte_mat.T).T, + //check_finite=False).T + Eigen::Matrix B = (covariance * (_update_mat.transpose())).transpose(); + Eigen::Matrix kalman_gain = (projected_cov.llt().solve(B)).transpose(); // eg.8x4 + Eigen::Matrix innovation = measurement - projected_mean; //eg.1x4 + auto tmp = innovation * (kalman_gain.transpose()); + KAL_MEAN new_mean = (mean.array() + tmp.array()).matrix(); + KAL_COVA new_covariance = covariance - kalman_gain * projected_cov*(kalman_gain.transpose()); + return std::make_pair(new_mean, new_covariance); + } + + Eigen::Matrix + KalmanFilter::gating_distance( + const KAL_MEAN &mean, + const KAL_COVA &covariance, + const std::vector &measurements, + bool only_position) + { + KAL_HDATA pa = this->project(mean, covariance); + if (only_position) { + printf("not implement!"); + exit(0); + } + KAL_HMEAN mean1 = pa.first; + KAL_HCOVA covariance1 = pa.second; + + // Eigen::Matrix d(size, 4); + DETECTBOXSS d(measurements.size(), 4); + int pos = 0; + for (DETECTBOX box : measurements) { + d.row(pos++) = box - mean1; + } + Eigen::Matrix factor = covariance1.llt().matrixL(); + Eigen::Matrix z = factor.triangularView().solve(d).transpose(); + auto zz = ((z.array())*(z.array())).matrix(); + auto square_maha = zz.colwise().sum(); + return square_maha; + } +} \ No newline at end of file diff --git a/deploy/TensorRT/cpp/src/lapjv.cpp b/deploy/TensorRT/cpp/src/lapjv.cpp new file mode 100644 index 0000000000000000000000000000000000000000..169efd51f915adf8c666f3f4978f1cb7b2d3e1b3 --- /dev/null +++ b/deploy/TensorRT/cpp/src/lapjv.cpp @@ -0,0 +1,343 @@ +#include +#include +#include + +#include "lapjv.h" + +/** Column-reduction and reduction transfer for a dense cost matrix. + */ +int_t _ccrrt_dense(const uint_t n, cost_t *cost[], + int_t *free_rows, int_t *x, int_t *y, cost_t *v) +{ + int_t n_free_rows; + boolean *unique; + + for (uint_t i = 0; i < n; i++) { + x[i] = -1; + v[i] = LARGE; + y[i] = 0; + } + for (uint_t i = 0; i < n; i++) { + for (uint_t j = 0; j < n; j++) { + const cost_t c = cost[i][j]; + if (c < v[j]) { + v[j] = c; + y[j] = i; + } + PRINTF("i=%d, j=%d, c[i,j]=%f, v[j]=%f y[j]=%d\n", i, j, c, v[j], y[j]); + } + } + PRINT_COST_ARRAY(v, n); + PRINT_INDEX_ARRAY(y, n); + NEW(unique, boolean, n); + memset(unique, TRUE, n); + { + int_t j = n; + do { + j--; + const int_t i = y[j]; + if (x[i] < 0) { + x[i] = j; + } + else { + unique[i] = FALSE; + y[j] = -1; + } + } while (j > 0); + } + n_free_rows = 0; + for (uint_t i = 0; i < n; i++) { + if (x[i] < 0) { + free_rows[n_free_rows++] = i; + } + else if (unique[i]) { + const int_t j = x[i]; + cost_t min = LARGE; + for (uint_t j2 = 0; j2 < n; j2++) { + if (j2 == (uint_t)j) { + continue; + } + const cost_t c = cost[i][j2] - v[j2]; + if (c < min) { + min = c; + } + } + PRINTF("v[%d] = %f - %f\n", j, v[j], min); + v[j] -= min; + } + } + FREE(unique); + return n_free_rows; +} + + +/** Augmenting row reduction for a dense cost matrix. + */ +int_t _carr_dense( + const uint_t n, cost_t *cost[], + const uint_t n_free_rows, + int_t *free_rows, int_t *x, int_t *y, cost_t *v) +{ + uint_t current = 0; + int_t new_free_rows = 0; + uint_t rr_cnt = 0; + PRINT_INDEX_ARRAY(x, n); + PRINT_INDEX_ARRAY(y, n); + PRINT_COST_ARRAY(v, n); + PRINT_INDEX_ARRAY(free_rows, n_free_rows); + while (current < n_free_rows) { + int_t i0; + int_t j1, j2; + cost_t v1, v2, v1_new; + boolean v1_lowers; + + rr_cnt++; + PRINTF("current = %d rr_cnt = %d\n", current, rr_cnt); + const int_t free_i = free_rows[current++]; + j1 = 0; + v1 = cost[free_i][0] - v[0]; + j2 = -1; + v2 = LARGE; + for (uint_t j = 1; j < n; j++) { + PRINTF("%d = %f %d = %f\n", j1, v1, j2, v2); + const cost_t c = cost[free_i][j] - v[j]; + if (c < v2) { + if (c >= v1) { + v2 = c; + j2 = j; + } + else { + v2 = v1; + v1 = c; + j2 = j1; + j1 = j; + } + } + } + i0 = y[j1]; + v1_new = v[j1] - (v2 - v1); + v1_lowers = v1_new < v[j1]; + PRINTF("%d %d 1=%d,%f 2=%d,%f v1'=%f(%d,%g) \n", free_i, i0, j1, v1, j2, v2, v1_new, v1_lowers, v[j1] - v1_new); + if (rr_cnt < current * n) { + if (v1_lowers) { + v[j1] = v1_new; + } + else if (i0 >= 0 && j2 >= 0) { + j1 = j2; + i0 = y[j2]; + } + if (i0 >= 0) { + if (v1_lowers) { + free_rows[--current] = i0; + } + else { + free_rows[new_free_rows++] = i0; + } + } + } + else { + PRINTF("rr_cnt=%d >= %d (current=%d * n=%d)\n", rr_cnt, current * n, current, n); + if (i0 >= 0) { + free_rows[new_free_rows++] = i0; + } + } + x[free_i] = j1; + y[j1] = free_i; + } + return new_free_rows; +} + + +/** Find columns with minimum d[j] and put them on the SCAN list. + */ +uint_t _find_dense(const uint_t n, uint_t lo, cost_t *d, int_t *cols, int_t *y) +{ + uint_t hi = lo + 1; + cost_t mind = d[cols[lo]]; + for (uint_t k = hi; k < n; k++) { + int_t j = cols[k]; + if (d[j] <= mind) { + if (d[j] < mind) { + hi = lo; + mind = d[j]; + } + cols[k] = cols[hi]; + cols[hi++] = j; + } + } + return hi; +} + + +// Scan all columns in TODO starting from arbitrary column in SCAN +// and try to decrease d of the TODO columns using the SCAN column. +int_t _scan_dense(const uint_t n, cost_t *cost[], + uint_t *plo, uint_t*phi, + cost_t *d, int_t *cols, int_t *pred, + int_t *y, cost_t *v) +{ + uint_t lo = *plo; + uint_t hi = *phi; + cost_t h, cred_ij; + + while (lo != hi) { + int_t j = cols[lo++]; + const int_t i = y[j]; + const cost_t mind = d[j]; + h = cost[i][j] - v[j] - mind; + PRINTF("i=%d j=%d h=%f\n", i, j, h); + // For all columns in TODO + for (uint_t k = hi; k < n; k++) { + j = cols[k]; + cred_ij = cost[i][j] - v[j] - h; + if (cred_ij < d[j]) { + d[j] = cred_ij; + pred[j] = i; + if (cred_ij == mind) { + if (y[j] < 0) { + return j; + } + cols[k] = cols[hi]; + cols[hi++] = j; + } + } + } + } + *plo = lo; + *phi = hi; + return -1; +} + + +/** Single iteration of modified Dijkstra shortest path algorithm as explained in the JV paper. + * + * This is a dense matrix version. + * + * \return The closest free column index. + */ +int_t find_path_dense( + const uint_t n, cost_t *cost[], + const int_t start_i, + int_t *y, cost_t *v, + int_t *pred) +{ + uint_t lo = 0, hi = 0; + int_t final_j = -1; + uint_t n_ready = 0; + int_t *cols; + cost_t *d; + + NEW(cols, int_t, n); + NEW(d, cost_t, n); + + for (uint_t i = 0; i < n; i++) { + cols[i] = i; + pred[i] = start_i; + d[i] = cost[start_i][i] - v[i]; + } + PRINT_COST_ARRAY(d, n); + while (final_j == -1) { + // No columns left on the SCAN list. + if (lo == hi) { + PRINTF("%d..%d -> find\n", lo, hi); + n_ready = lo; + hi = _find_dense(n, lo, d, cols, y); + PRINTF("check %d..%d\n", lo, hi); + PRINT_INDEX_ARRAY(cols, n); + for (uint_t k = lo; k < hi; k++) { + const int_t j = cols[k]; + if (y[j] < 0) { + final_j = j; + } + } + } + if (final_j == -1) { + PRINTF("%d..%d -> scan\n", lo, hi); + final_j = _scan_dense( + n, cost, &lo, &hi, d, cols, pred, y, v); + PRINT_COST_ARRAY(d, n); + PRINT_INDEX_ARRAY(cols, n); + PRINT_INDEX_ARRAY(pred, n); + } + } + + PRINTF("found final_j=%d\n", final_j); + PRINT_INDEX_ARRAY(cols, n); + { + const cost_t mind = d[cols[lo]]; + for (uint_t k = 0; k < n_ready; k++) { + const int_t j = cols[k]; + v[j] += d[j] - mind; + } + } + + FREE(cols); + FREE(d); + + return final_j; +} + + +/** Augment for a dense cost matrix. + */ +int_t _ca_dense( + const uint_t n, cost_t *cost[], + const uint_t n_free_rows, + int_t *free_rows, int_t *x, int_t *y, cost_t *v) +{ + int_t *pred; + + NEW(pred, int_t, n); + + for (int_t *pfree_i = free_rows; pfree_i < free_rows + n_free_rows; pfree_i++) { + int_t i = -1, j; + uint_t k = 0; + + PRINTF("looking at free_i=%d\n", *pfree_i); + j = find_path_dense(n, cost, *pfree_i, y, v, pred); + ASSERT(j >= 0); + ASSERT(j < n); + while (i != *pfree_i) { + PRINTF("augment %d\n", j); + PRINT_INDEX_ARRAY(pred, n); + i = pred[j]; + PRINTF("y[%d]=%d -> %d\n", j, y[j], i); + y[j] = i; + PRINT_INDEX_ARRAY(x, n); + SWAP_INDICES(j, x[i]); + k++; + if (k >= n) { + ASSERT(FALSE); + } + } + } + FREE(pred); + return 0; +} + + +/** Solve dense sparse LAP. + */ +int lapjv_internal( + const uint_t n, cost_t *cost[], + int_t *x, int_t *y) +{ + int ret; + int_t *free_rows; + cost_t *v; + + NEW(free_rows, int_t, n); + NEW(v, cost_t, n); + ret = _ccrrt_dense(n, cost, free_rows, x, y, v); + int i = 0; + while (ret > 0 && i < 2) { + ret = _carr_dense(n, cost, ret, free_rows, x, y, v); + i++; + } + if (ret > 0) { + ret = _ca_dense(n, cost, ret, free_rows, x, y, v); + } + FREE(v); + FREE(free_rows); + return ret; +} \ No newline at end of file diff --git a/deploy/TensorRT/cpp/src/utils.cpp b/deploy/TensorRT/cpp/src/utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4aa0305cd6cf025496528ef9ff49075209fe9e8c --- /dev/null +++ b/deploy/TensorRT/cpp/src/utils.cpp @@ -0,0 +1,429 @@ +#include "BYTETracker.h" +#include "lapjv.h" + +vector BYTETracker::joint_stracks(vector &tlista, vector &tlistb) +{ + map exists; + vector res; + for (int i = 0; i < tlista.size(); i++) + { + exists.insert(pair(tlista[i]->track_id, 1)); + res.push_back(tlista[i]); + } + for (int i = 0; i < tlistb.size(); i++) + { + int tid = tlistb[i].track_id; + if (!exists[tid] || exists.count(tid) == 0) + { + exists[tid] = 1; + res.push_back(&tlistb[i]); + } + } + return res; +} + +vector BYTETracker::joint_stracks(vector &tlista, vector &tlistb) +{ + map exists; + vector res; + for (int i = 0; i < tlista.size(); i++) + { + exists.insert(pair(tlista[i].track_id, 1)); + res.push_back(tlista[i]); + } + for (int i = 0; i < tlistb.size(); i++) + { + int tid = tlistb[i].track_id; + if (!exists[tid] || exists.count(tid) == 0) + { + exists[tid] = 1; + res.push_back(tlistb[i]); + } + } + return res; +} + +vector BYTETracker::sub_stracks(vector &tlista, vector &tlistb) +{ + map stracks; + for (int i = 0; i < tlista.size(); i++) + { + stracks.insert(pair(tlista[i].track_id, tlista[i])); + } + for (int i = 0; i < tlistb.size(); i++) + { + int tid = tlistb[i].track_id; + if (stracks.count(tid) != 0) + { + stracks.erase(tid); + } + } + + vector res; + std::map::iterator it; + for (it = stracks.begin(); it != stracks.end(); ++it) + { + res.push_back(it->second); + } + + return res; +} + +void BYTETracker::remove_duplicate_stracks(vector &resa, vector &resb, vector &stracksa, vector &stracksb) +{ + vector > pdist = iou_distance(stracksa, stracksb); + vector > pairs; + for (int i = 0; i < pdist.size(); i++) + { + for (int j = 0; j < pdist[i].size(); j++) + { + if (pdist[i][j] < 0.15) + { + pairs.push_back(pair(i, j)); + } + } + } + + vector dupa, dupb; + for (int i = 0; i < pairs.size(); i++) + { + int timep = stracksa[pairs[i].first].frame_id - stracksa[pairs[i].first].start_frame; + int timeq = stracksb[pairs[i].second].frame_id - stracksb[pairs[i].second].start_frame; + if (timep > timeq) + dupb.push_back(pairs[i].second); + else + dupa.push_back(pairs[i].first); + } + + for (int i = 0; i < stracksa.size(); i++) + { + vector::iterator iter = find(dupa.begin(), dupa.end(), i); + if (iter == dupa.end()) + { + resa.push_back(stracksa[i]); + } + } + + for (int i = 0; i < stracksb.size(); i++) + { + vector::iterator iter = find(dupb.begin(), dupb.end(), i); + if (iter == dupb.end()) + { + resb.push_back(stracksb[i]); + } + } +} + +void BYTETracker::linear_assignment(vector > &cost_matrix, int cost_matrix_size, int cost_matrix_size_size, float thresh, + vector > &matches, vector &unmatched_a, vector &unmatched_b) +{ + if (cost_matrix.size() == 0) + { + for (int i = 0; i < cost_matrix_size; i++) + { + unmatched_a.push_back(i); + } + for (int i = 0; i < cost_matrix_size_size; i++) + { + unmatched_b.push_back(i); + } + return; + } + + vector rowsol; vector colsol; + float c = lapjv(cost_matrix, rowsol, colsol, true, thresh); + for (int i = 0; i < rowsol.size(); i++) + { + if (rowsol[i] >= 0) + { + vector match; + match.push_back(i); + match.push_back(rowsol[i]); + matches.push_back(match); + } + else + { + unmatched_a.push_back(i); + } + } + + for (int i = 0; i < colsol.size(); i++) + { + if (colsol[i] < 0) + { + unmatched_b.push_back(i); + } + } +} + +vector > BYTETracker::ious(vector > &atlbrs, vector > &btlbrs) +{ + vector > ious; + if (atlbrs.size()*btlbrs.size() == 0) + return ious; + + ious.resize(atlbrs.size()); + for (int i = 0; i < ious.size(); i++) + { + ious[i].resize(btlbrs.size()); + } + + //bbox_ious + for (int k = 0; k < btlbrs.size(); k++) + { + vector ious_tmp; + float box_area = (btlbrs[k][2] - btlbrs[k][0] + 1)*(btlbrs[k][3] - btlbrs[k][1] + 1); + for (int n = 0; n < atlbrs.size(); n++) + { + float iw = min(atlbrs[n][2], btlbrs[k][2]) - max(atlbrs[n][0], btlbrs[k][0]) + 1; + if (iw > 0) + { + float ih = min(atlbrs[n][3], btlbrs[k][3]) - max(atlbrs[n][1], btlbrs[k][1]) + 1; + if(ih > 0) + { + float ua = (atlbrs[n][2] - atlbrs[n][0] + 1)*(atlbrs[n][3] - atlbrs[n][1] + 1) + box_area - iw * ih; + ious[n][k] = iw * ih / ua; + } + else + { + ious[n][k] = 0.0; + } + } + else + { + ious[n][k] = 0.0; + } + } + } + + return ious; +} + +vector > BYTETracker::iou_distance(vector &atracks, vector &btracks, int &dist_size, int &dist_size_size) +{ + vector > cost_matrix; + if (atracks.size() * btracks.size() == 0) + { + dist_size = atracks.size(); + dist_size_size = btracks.size(); + return cost_matrix; + } + vector > atlbrs, btlbrs; + for (int i = 0; i < atracks.size(); i++) + { + atlbrs.push_back(atracks[i]->tlbr); + } + for (int i = 0; i < btracks.size(); i++) + { + btlbrs.push_back(btracks[i].tlbr); + } + + dist_size = atracks.size(); + dist_size_size = btracks.size(); + + vector > _ious = ious(atlbrs, btlbrs); + + for (int i = 0; i < _ious.size();i++) + { + vector _iou; + for (int j = 0; j < _ious[i].size(); j++) + { + _iou.push_back(1 - _ious[i][j]); + } + cost_matrix.push_back(_iou); + } + + return cost_matrix; +} + +vector > BYTETracker::iou_distance(vector &atracks, vector &btracks) +{ + vector > atlbrs, btlbrs; + for (int i = 0; i < atracks.size(); i++) + { + atlbrs.push_back(atracks[i].tlbr); + } + for (int i = 0; i < btracks.size(); i++) + { + btlbrs.push_back(btracks[i].tlbr); + } + + vector > _ious = ious(atlbrs, btlbrs); + vector > cost_matrix; + for (int i = 0; i < _ious.size(); i++) + { + vector _iou; + for (int j = 0; j < _ious[i].size(); j++) + { + _iou.push_back(1 - _ious[i][j]); + } + cost_matrix.push_back(_iou); + } + + return cost_matrix; +} + +double BYTETracker::lapjv(const vector > &cost, vector &rowsol, vector &colsol, + bool extend_cost, float cost_limit, bool return_cost) +{ + vector > cost_c; + cost_c.assign(cost.begin(), cost.end()); + + vector > cost_c_extended; + + int n_rows = cost.size(); + int n_cols = cost[0].size(); + rowsol.resize(n_rows); + colsol.resize(n_cols); + + int n = 0; + if (n_rows == n_cols) + { + n = n_rows; + } + else + { + if (!extend_cost) + { + cout << "set extend_cost=True" << endl; + system("pause"); + exit(0); + } + } + + if (extend_cost || cost_limit < LONG_MAX) + { + n = n_rows + n_cols; + cost_c_extended.resize(n); + for (int i = 0; i < cost_c_extended.size(); i++) + cost_c_extended[i].resize(n); + + if (cost_limit < LONG_MAX) + { + for (int i = 0; i < cost_c_extended.size(); i++) + { + for (int j = 0; j < cost_c_extended[i].size(); j++) + { + cost_c_extended[i][j] = cost_limit / 2.0; + } + } + } + else + { + float cost_max = -1; + for (int i = 0; i < cost_c.size(); i++) + { + for (int j = 0; j < cost_c[i].size(); j++) + { + if (cost_c[i][j] > cost_max) + cost_max = cost_c[i][j]; + } + } + for (int i = 0; i < cost_c_extended.size(); i++) + { + for (int j = 0; j < cost_c_extended[i].size(); j++) + { + cost_c_extended[i][j] = cost_max + 1; + } + } + } + + for (int i = n_rows; i < cost_c_extended.size(); i++) + { + for (int j = n_cols; j < cost_c_extended[i].size(); j++) + { + cost_c_extended[i][j] = 0; + } + } + for (int i = 0; i < n_rows; i++) + { + for (int j = 0; j < n_cols; j++) + { + cost_c_extended[i][j] = cost_c[i][j]; + } + } + + cost_c.clear(); + cost_c.assign(cost_c_extended.begin(), cost_c_extended.end()); + } + + double **cost_ptr; + cost_ptr = new double *[sizeof(double *) * n]; + for (int i = 0; i < n; i++) + cost_ptr[i] = new double[sizeof(double) * n]; + + for (int i = 0; i < n; i++) + { + for (int j = 0; j < n; j++) + { + cost_ptr[i][j] = cost_c[i][j]; + } + } + + int* x_c = new int[sizeof(int) * n]; + int *y_c = new int[sizeof(int) * n]; + + int ret = lapjv_internal(n, cost_ptr, x_c, y_c); + if (ret != 0) + { + cout << "Calculate Wrong!" << endl; + system("pause"); + exit(0); + } + + double opt = 0.0; + + if (n != n_rows) + { + for (int i = 0; i < n; i++) + { + if (x_c[i] >= n_cols) + x_c[i] = -1; + if (y_c[i] >= n_rows) + y_c[i] = -1; + } + for (int i = 0; i < n_rows; i++) + { + rowsol[i] = x_c[i]; + } + for (int i = 0; i < n_cols; i++) + { + colsol[i] = y_c[i]; + } + + if (return_cost) + { + for (int i = 0; i < rowsol.size(); i++) + { + if (rowsol[i] != -1) + { + //cout << i << "\t" << rowsol[i] << "\t" << cost_ptr[i][rowsol[i]] << endl; + opt += cost_ptr[i][rowsol[i]]; + } + } + } + } + else if (return_cost) + { + for (int i = 0; i < rowsol.size(); i++) + { + opt += cost_ptr[i][rowsol[i]]; + } + } + + for (int i = 0; i < n; i++) + { + delete[]cost_ptr[i]; + } + delete[]cost_ptr; + delete[]x_c; + delete[]y_c; + + return opt; +} + +Scalar BYTETracker::get_color(int idx) +{ + idx += 3; + return Scalar(37 * idx % 255, 17 * idx % 255, 29 * idx % 255); +} \ No newline at end of file diff --git a/deploy/TensorRT/python/README.md b/deploy/TensorRT/python/README.md new file mode 100644 index 0000000000000000000000000000000000000000..235401dc0f8c16ce00bb18a545af1fa541b3895f --- /dev/null +++ b/deploy/TensorRT/python/README.md @@ -0,0 +1,22 @@ +# ByteTrack-TensorRT in Python + +## Install TensorRT Toolkit +Please follow the [TensorRT Installation Guide](https://docs.nvidia.com/deeplearning/tensorrt/install-guide/index.html) and [torch2trt gitrepo](https://github.com/NVIDIA-AI-IOT/torch2trt) to install TensorRT (Version 7 recommended) and torch2trt. + +## Convert model + +You can convert the Pytorch model “bytetrack_s_mot17” to TensorRT model by running: + +```shell +cd +python3 tools/trt.py -f exps/example/mot/yolox_s_mix_det.py -c pretrained/bytetrack_s_mot17.pth.tar +``` + +## Run TensorRT demo + +You can use the converted model_trt.pth to run TensorRT demo with **130 FPS**: + +```shell +cd +python3 tools/demo_track.py video -f exps/example/mot/yolox_s_mix_det.py --trt --save_result +``` diff --git a/deploy/ncnn/cpp/CMakeLists.txt b/deploy/ncnn/cpp/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..e2118d807bb0ed988f76a8e333a65da7cab14c60 --- /dev/null +++ b/deploy/ncnn/cpp/CMakeLists.txt @@ -0,0 +1,84 @@ +macro(ncnn_add_example name) + add_executable(${name} ${name}.cpp) + if(OpenCV_FOUND) + target_include_directories(${name} PRIVATE ${OpenCV_INCLUDE_DIRS}) + target_link_libraries(${name} PRIVATE ncnn ${OpenCV_LIBS}) + elseif(NCNN_SIMPLEOCV) + target_compile_definitions(${name} PUBLIC USE_NCNN_SIMPLEOCV) + target_link_libraries(${name} PRIVATE ncnn) + endif() + + # add test to a virtual project group + set_property(TARGET ${name} PROPERTY FOLDER "examples") +endmacro() + +if(NCNN_PIXEL) + find_package(OpenCV QUIET COMPONENTS opencv_world) + # for opencv 2.4 on ubuntu 16.04, there is no opencv_world but OpenCV_FOUND will be TRUE + if("${OpenCV_LIBS}" STREQUAL "") + set(OpenCV_FOUND FALSE) + endif() + if(NOT OpenCV_FOUND) + find_package(OpenCV QUIET COMPONENTS core highgui imgproc imgcodecs videoio) + endif() + if(NOT OpenCV_FOUND) + find_package(OpenCV QUIET COMPONENTS core highgui imgproc) + endif() + + if(OpenCV_FOUND OR NCNN_SIMPLEOCV) + if(OpenCV_FOUND) + message(STATUS "OpenCV library: ${OpenCV_INSTALL_PATH}") + message(STATUS " version: ${OpenCV_VERSION}") + message(STATUS " libraries: ${OpenCV_LIBS}") + message(STATUS " include path: ${OpenCV_INCLUDE_DIRS}") + + if(${OpenCV_VERSION_MAJOR} GREATER 3) + set(CMAKE_CXX_STANDARD 11) + endif() + endif() + + include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../src) + include_directories(${CMAKE_CURRENT_BINARY_DIR}/../src) + include_directories(include) + include_directories(/usr/local/include/eigen3) + + ncnn_add_example(squeezenet) + ncnn_add_example(squeezenet_c_api) + ncnn_add_example(fasterrcnn) + ncnn_add_example(rfcn) + ncnn_add_example(yolov2) + ncnn_add_example(yolov3) + if(OpenCV_FOUND) + ncnn_add_example(yolov4) + endif() + ncnn_add_example(yolov5) + ncnn_add_example(yolox) + ncnn_add_example(mobilenetv2ssdlite) + ncnn_add_example(mobilenetssd) + ncnn_add_example(squeezenetssd) + ncnn_add_example(shufflenetv2) + ncnn_add_example(peleenetssd_seg) + ncnn_add_example(simplepose) + ncnn_add_example(retinaface) + ncnn_add_example(yolact) + ncnn_add_example(nanodet) + ncnn_add_example(scrfd) + ncnn_add_example(scrfd_crowdhuman) + ncnn_add_example(rvm) + file(GLOB My_Source_Files src/*.cpp) + add_executable(bytetrack ${My_Source_Files}) + if(OpenCV_FOUND) + target_include_directories(bytetrack PRIVATE ${OpenCV_INCLUDE_DIRS}) + target_link_libraries(bytetrack PRIVATE ncnn ${OpenCV_LIBS}) + elseif(NCNN_SIMPLEOCV) + target_compile_definitions(bytetrack PUBLIC USE_NCNN_SIMPLEOCV) + target_link_libraries(bytetrack PRIVATE ncnn) + endif() + # add test to a virtual project group + set_property(TARGET bytetrack PROPERTY FOLDER "examples") + else() + message(WARNING "OpenCV not found and NCNN_SIMPLEOCV disabled, examples won't be built") + endif() +else() + message(WARNING "NCNN_PIXEL not enabled, examples won't be built") +endif() diff --git a/deploy/ncnn/cpp/README.md b/deploy/ncnn/cpp/README.md new file mode 100644 index 0000000000000000000000000000000000000000..38137039f2056b43a77206092ac9c4cd282a2853 --- /dev/null +++ b/deploy/ncnn/cpp/README.md @@ -0,0 +1,103 @@ +# ByteTrack-CPP-ncnn + +## Installation + +Clone [ncnn](https://github.com/Tencent/ncnn) first, then please following [build tutorial of ncnn](https://github.com/Tencent/ncnn/wiki/how-to-build) to build on your own device. + +Install eigen-3.3.9 [[google]](https://drive.google.com/file/d/1rqO74CYCNrmRAg8Rra0JP3yZtJ-rfket/view?usp=sharing), [[baidu(code:ueq4)]](https://pan.baidu.com/s/15kEfCxpy-T7tz60msxxExg). + +```shell +unzip eigen-3.3.9.zip +cd eigen-3.3.9 +mkdir build +cd build +cmake .. +sudo make install +``` + +## Generate onnx file +Use provided tools to generate onnx file. +For example, if you want to generate onnx file of bytetrack_s_mot17.pth, please run the following command: +```shell +cd +python3 tools/export_onnx.py -f exps/example/mot/yolox_s_mix_det.py -c pretrained/bytetrack_s_mot17.pth.tar +``` +Then, a bytetrack_s.onnx file is generated under . + +## Generate ncnn param and bin file +Put bytetrack_s.onnx under ncnn/build/tools/onnx and then run: + +```shell +cd ncnn/build/tools/onnx +./onnx2ncnn bytetrack_s.onnx bytetrack_s.param bytetrack_s.bin +``` + +Since Focus module is not supported in ncnn. Warnings like: +```shell +Unsupported slice step ! +``` +will be printed. However, don't worry! C++ version of Focus layer is already implemented in src/bytetrack.cpp. + +## Modify param file +Open **bytetrack_s.param**, and modify it. +Before (just an example): +``` +235 268 +Input images 0 1 images +Split splitncnn_input0 1 4 images images_splitncnn_0 images_splitncnn_1 images_splitncnn_2 images_splitncnn_3 +Crop Slice_4 1 1 images_splitncnn_3 467 -23309=1,0 -23310=1,2147483647 -23311=1,1 +Crop Slice_9 1 1 467 472 -23309=1,0 -23310=1,2147483647 -23311=1,2 +Crop Slice_14 1 1 images_splitncnn_2 477 -23309=1,0 -23310=1,2147483647 -23311=1,1 +Crop Slice_19 1 1 477 482 -23309=1,1 -23310=1,2147483647 -23311=1,2 +Crop Slice_24 1 1 images_splitncnn_1 487 -23309=1,1 -23310=1,2147483647 -23311=1,1 +Crop Slice_29 1 1 487 492 -23309=1,0 -23310=1,2147483647 -23311=1,2 +Crop Slice_34 1 1 images_splitncnn_0 497 -23309=1,1 -23310=1,2147483647 -23311=1,1 +Crop Slice_39 1 1 497 502 -23309=1,1 -23310=1,2147483647 -23311=1,2 +Concat Concat_40 4 1 472 492 482 502 503 0=0 +... +``` +* Change first number for 235 to 235 - 9 = 226(since we will remove 10 layers and add 1 layers, total layers number should minus 9). +* Then remove 10 lines of code from Split to Concat, but remember the last but 2nd number: 503. +* Add YoloV5Focus layer After Input (using previous number 503): +``` +YoloV5Focus focus 1 1 images 503 +``` +After(just an exmaple): +``` +226 328 +Input images 0 1 images +YoloV5Focus focus 1 1 images 503 +... +``` + +## Use ncnn_optimize to generate new param and bin +```shell +# suppose you are still under ncnn/build/tools/onnx dir. +../ncnnoptimize bytetrack_s.param bytetrack_s.bin bytetrack_s_op.param bytetrack_s_op.bin 65536 +``` + +## Copy files and build ByteTrack +Copy or move 'src', 'include' folders and 'CMakeLists.txt' file into ncnn/examples. Copy bytetrack_s_op.param, bytetrack_s_op.bin and /videos/palace.mp4 into ncnn/build/examples. Then, build ByteTrack: + +```shell +cd ncnn/build/examples +cmake .. +make +``` + +## Run the demo +You can run the ncnn demo with **5 FPS** (96-core Intel(R) Xeon(R) Platinum 8163 CPU @ 2.50GHz): +```shell +./bytetrack palace.mp4 +``` + +You can modify 'num_threads' to optimize the running speed in [bytetrack.cpp](https://github.com/ifzhang/ByteTrack/blob/2e9a67895da6b47b948015f6861bba0bacd4e72f/deploy/ncnn/cpp/src/bytetrack.cpp#L309) according to the number of your CPU cores: + +``` +yolox.opt.num_threads = 20; +``` + + +## Acknowledgement + +* [ncnn](https://github.com/Tencent/ncnn) diff --git a/deploy/ncnn/cpp/include/BYTETracker.h b/deploy/ncnn/cpp/include/BYTETracker.h new file mode 100644 index 0000000000000000000000000000000000000000..e3dda973fa27ccdb85a27841ec2a1cf8dcc1e9b0 --- /dev/null +++ b/deploy/ncnn/cpp/include/BYTETracker.h @@ -0,0 +1,49 @@ +#pragma once + +#include "STrack.h" + +struct Object +{ + cv::Rect_ rect; + int label; + float prob; +}; + +class BYTETracker +{ +public: + BYTETracker(int frame_rate = 30, int track_buffer = 30); + ~BYTETracker(); + + vector update(const vector& objects); + Scalar get_color(int idx); + +private: + vector joint_stracks(vector &tlista, vector &tlistb); + vector joint_stracks(vector &tlista, vector &tlistb); + + vector sub_stracks(vector &tlista, vector &tlistb); + void remove_duplicate_stracks(vector &resa, vector &resb, vector &stracksa, vector &stracksb); + + void linear_assignment(vector > &cost_matrix, int cost_matrix_size, int cost_matrix_size_size, float thresh, + vector > &matches, vector &unmatched_a, vector &unmatched_b); + vector > iou_distance(vector &atracks, vector &btracks, int &dist_size, int &dist_size_size); + vector > iou_distance(vector &atracks, vector &btracks); + vector > ious(vector > &atlbrs, vector > &btlbrs); + + double lapjv(const vector > &cost, vector &rowsol, vector &colsol, + bool extend_cost = false, float cost_limit = LONG_MAX, bool return_cost = true); + +private: + + float track_thresh; + float high_thresh; + float match_thresh; + int frame_id; + int max_time_lost; + + vector tracked_stracks; + vector lost_stracks; + vector removed_stracks; + byte_kalman::KalmanFilter kalman_filter; +}; \ No newline at end of file diff --git a/deploy/ncnn/cpp/include/STrack.h b/deploy/ncnn/cpp/include/STrack.h new file mode 100644 index 0000000000000000000000000000000000000000..752cbefa8f7f7f4f0aff08e0e28ff036afe7d61a --- /dev/null +++ b/deploy/ncnn/cpp/include/STrack.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include "kalmanFilter.h" + +using namespace cv; +using namespace std; + +enum TrackState { New = 0, Tracked, Lost, Removed }; + +class STrack +{ +public: + STrack(vector tlwh_, float score); + ~STrack(); + + vector static tlbr_to_tlwh(vector &tlbr); + void static multi_predict(vector &stracks, byte_kalman::KalmanFilter &kalman_filter); + void static_tlwh(); + void static_tlbr(); + vector tlwh_to_xyah(vector tlwh_tmp); + vector to_xyah(); + void mark_lost(); + void mark_removed(); + int next_id(); + int end_frame(); + + void activate(byte_kalman::KalmanFilter &kalman_filter, int frame_id); + void re_activate(STrack &new_track, int frame_id, bool new_id = false); + void update(STrack &new_track, int frame_id); + +public: + bool is_activated; + int track_id; + int state; + + vector _tlwh; + vector tlwh; + vector tlbr; + int frame_id; + int tracklet_len; + int start_frame; + + KAL_MEAN mean; + KAL_COVA covariance; + float score; + +private: + byte_kalman::KalmanFilter kalman_filter; +}; \ No newline at end of file diff --git a/deploy/ncnn/cpp/include/dataType.h b/deploy/ncnn/cpp/include/dataType.h new file mode 100644 index 0000000000000000000000000000000000000000..a7821a395c1c03db137587b879b255846fb0ca16 --- /dev/null +++ b/deploy/ncnn/cpp/include/dataType.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include + +#include +#include +typedef Eigen::Matrix DETECTBOX; +typedef Eigen::Matrix DETECTBOXSS; +typedef Eigen::Matrix FEATURE; +typedef Eigen::Matrix FEATURESS; +//typedef std::vector FEATURESS; + +//Kalmanfilter +//typedef Eigen::Matrix KAL_FILTER; +typedef Eigen::Matrix KAL_MEAN; +typedef Eigen::Matrix KAL_COVA; +typedef Eigen::Matrix KAL_HMEAN; +typedef Eigen::Matrix KAL_HCOVA; +using KAL_DATA = std::pair; +using KAL_HDATA = std::pair; + +//main +using RESULT_DATA = std::pair; + +//tracker: +using TRACKER_DATA = std::pair; +using MATCH_DATA = std::pair; +typedef struct t { + std::vector matches; + std::vector unmatched_tracks; + std::vector unmatched_detections; +}TRACHER_MATCHD; + +//linear_assignment: +typedef Eigen::Matrix DYNAMICM; \ No newline at end of file diff --git a/deploy/ncnn/cpp/include/kalmanFilter.h b/deploy/ncnn/cpp/include/kalmanFilter.h new file mode 100644 index 0000000000000000000000000000000000000000..6596b54e33de75d1b49a8af9bfbb1f26d00ea786 --- /dev/null +++ b/deploy/ncnn/cpp/include/kalmanFilter.h @@ -0,0 +1,31 @@ +#pragma once + +#include "dataType.h" + +namespace byte_kalman +{ + class KalmanFilter + { + public: + static const double chi2inv95[10]; + KalmanFilter(); + KAL_DATA initiate(const DETECTBOX& measurement); + void predict(KAL_MEAN& mean, KAL_COVA& covariance); + KAL_HDATA project(const KAL_MEAN& mean, const KAL_COVA& covariance); + KAL_DATA update(const KAL_MEAN& mean, + const KAL_COVA& covariance, + const DETECTBOX& measurement); + + Eigen::Matrix gating_distance( + const KAL_MEAN& mean, + const KAL_COVA& covariance, + const std::vector& measurements, + bool only_position = false); + + private: + Eigen::Matrix _motion_mat; + Eigen::Matrix _update_mat; + float _std_weight_position; + float _std_weight_velocity; + }; +} \ No newline at end of file diff --git a/deploy/ncnn/cpp/include/lapjv.h b/deploy/ncnn/cpp/include/lapjv.h new file mode 100644 index 0000000000000000000000000000000000000000..0e34385a647bec225827370ff0041a391e628477 --- /dev/null +++ b/deploy/ncnn/cpp/include/lapjv.h @@ -0,0 +1,63 @@ +#ifndef LAPJV_H +#define LAPJV_H + +#define LARGE 1000000 + +#if !defined TRUE +#define TRUE 1 +#endif +#if !defined FALSE +#define FALSE 0 +#endif + +#define NEW(x, t, n) if ((x = (t *)malloc(sizeof(t) * (n))) == 0) { return -1; } +#define FREE(x) if (x != 0) { free(x); x = 0; } +#define SWAP_INDICES(a, b) { int_t _temp_index = a; a = b; b = _temp_index; } + +#if 0 +#include +#define ASSERT(cond) assert(cond) +#define PRINTF(fmt, ...) printf(fmt, ##__VA_ARGS__) +#define PRINT_COST_ARRAY(a, n) \ + while (1) { \ + printf(#a" = ["); \ + if ((n) > 0) { \ + printf("%f", (a)[0]); \ + for (uint_t j = 1; j < n; j++) { \ + printf(", %f", (a)[j]); \ + } \ + } \ + printf("]\n"); \ + break; \ + } +#define PRINT_INDEX_ARRAY(a, n) \ + while (1) { \ + printf(#a" = ["); \ + if ((n) > 0) { \ + printf("%d", (a)[0]); \ + for (uint_t j = 1; j < n; j++) { \ + printf(", %d", (a)[j]); \ + } \ + } \ + printf("]\n"); \ + break; \ + } +#else +#define ASSERT(cond) +#define PRINTF(fmt, ...) +#define PRINT_COST_ARRAY(a, n) +#define PRINT_INDEX_ARRAY(a, n) +#endif + + +typedef signed int int_t; +typedef unsigned int uint_t; +typedef double cost_t; +typedef char boolean; +typedef enum fp_t { FP_1 = 1, FP_2 = 2, FP_DYNAMIC = 3 } fp_t; + +extern int_t lapjv_internal( + const uint_t n, cost_t *cost[], + int_t *x, int_t *y); + +#endif // LAPJV_H \ No newline at end of file diff --git a/deploy/ncnn/cpp/src/BYTETracker.cpp b/deploy/ncnn/cpp/src/BYTETracker.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7c936b81f2e95f335ec90b8c355360bc0ebee800 --- /dev/null +++ b/deploy/ncnn/cpp/src/BYTETracker.cpp @@ -0,0 +1,241 @@ +#include "BYTETracker.h" +#include + +BYTETracker::BYTETracker(int frame_rate, int track_buffer) +{ + track_thresh = 0.5; + high_thresh = 0.6; + match_thresh = 0.8; + + frame_id = 0; + max_time_lost = int(frame_rate / 30.0 * track_buffer); + cout << "Init ByteTrack!" << endl; +} + +BYTETracker::~BYTETracker() +{ +} + +vector BYTETracker::update(const vector& objects) +{ + + ////////////////// Step 1: Get detections ////////////////// + this->frame_id++; + vector activated_stracks; + vector refind_stracks; + vector removed_stracks; + vector lost_stracks; + vector detections; + vector detections_low; + + vector detections_cp; + vector tracked_stracks_swap; + vector resa, resb; + vector output_stracks; + + vector unconfirmed; + vector tracked_stracks; + vector strack_pool; + vector r_tracked_stracks; + + if (objects.size() > 0) + { + for (int i = 0; i < objects.size(); i++) + { + vector tlbr_; + tlbr_.resize(4); + tlbr_[0] = objects[i].rect.x; + tlbr_[1] = objects[i].rect.y; + tlbr_[2] = objects[i].rect.x + objects[i].rect.width; + tlbr_[3] = objects[i].rect.y + objects[i].rect.height; + + float score = objects[i].prob; + + STrack strack(STrack::tlbr_to_tlwh(tlbr_), score); + if (score >= track_thresh) + { + detections.push_back(strack); + } + else + { + detections_low.push_back(strack); + } + + } + } + + // Add newly detected tracklets to tracked_stracks + for (int i = 0; i < this->tracked_stracks.size(); i++) + { + if (!this->tracked_stracks[i].is_activated) + unconfirmed.push_back(&this->tracked_stracks[i]); + else + tracked_stracks.push_back(&this->tracked_stracks[i]); + } + + ////////////////// Step 2: First association, with IoU ////////////////// + strack_pool = joint_stracks(tracked_stracks, this->lost_stracks); + STrack::multi_predict(strack_pool, this->kalman_filter); + + vector > dists; + int dist_size = 0, dist_size_size = 0; + dists = iou_distance(strack_pool, detections, dist_size, dist_size_size); + + vector > matches; + vector u_track, u_detection; + linear_assignment(dists, dist_size, dist_size_size, match_thresh, matches, u_track, u_detection); + + for (int i = 0; i < matches.size(); i++) + { + STrack *track = strack_pool[matches[i][0]]; + STrack *det = &detections[matches[i][1]]; + if (track->state == TrackState::Tracked) + { + track->update(*det, this->frame_id); + activated_stracks.push_back(*track); + } + else + { + track->re_activate(*det, this->frame_id, false); + refind_stracks.push_back(*track); + } + } + + ////////////////// Step 3: Second association, using low score dets ////////////////// + for (int i = 0; i < u_detection.size(); i++) + { + detections_cp.push_back(detections[u_detection[i]]); + } + detections.clear(); + detections.assign(detections_low.begin(), detections_low.end()); + + for (int i = 0; i < u_track.size(); i++) + { + if (strack_pool[u_track[i]]->state == TrackState::Tracked) + { + r_tracked_stracks.push_back(strack_pool[u_track[i]]); + } + } + + dists.clear(); + dists = iou_distance(r_tracked_stracks, detections, dist_size, dist_size_size); + + matches.clear(); + u_track.clear(); + u_detection.clear(); + linear_assignment(dists, dist_size, dist_size_size, 0.5, matches, u_track, u_detection); + + for (int i = 0; i < matches.size(); i++) + { + STrack *track = r_tracked_stracks[matches[i][0]]; + STrack *det = &detections[matches[i][1]]; + if (track->state == TrackState::Tracked) + { + track->update(*det, this->frame_id); + activated_stracks.push_back(*track); + } + else + { + track->re_activate(*det, this->frame_id, false); + refind_stracks.push_back(*track); + } + } + + for (int i = 0; i < u_track.size(); i++) + { + STrack *track = r_tracked_stracks[u_track[i]]; + if (track->state != TrackState::Lost) + { + track->mark_lost(); + lost_stracks.push_back(*track); + } + } + + // Deal with unconfirmed tracks, usually tracks with only one beginning frame + detections.clear(); + detections.assign(detections_cp.begin(), detections_cp.end()); + + dists.clear(); + dists = iou_distance(unconfirmed, detections, dist_size, dist_size_size); + + matches.clear(); + vector u_unconfirmed; + u_detection.clear(); + linear_assignment(dists, dist_size, dist_size_size, 0.7, matches, u_unconfirmed, u_detection); + + for (int i = 0; i < matches.size(); i++) + { + unconfirmed[matches[i][0]]->update(detections[matches[i][1]], this->frame_id); + activated_stracks.push_back(*unconfirmed[matches[i][0]]); + } + + for (int i = 0; i < u_unconfirmed.size(); i++) + { + STrack *track = unconfirmed[u_unconfirmed[i]]; + track->mark_removed(); + removed_stracks.push_back(*track); + } + + ////////////////// Step 4: Init new stracks ////////////////// + for (int i = 0; i < u_detection.size(); i++) + { + STrack *track = &detections[u_detection[i]]; + if (track->score < this->high_thresh) + continue; + track->activate(this->kalman_filter, this->frame_id); + activated_stracks.push_back(*track); + } + + ////////////////// Step 5: Update state ////////////////// + for (int i = 0; i < this->lost_stracks.size(); i++) + { + if (this->frame_id - this->lost_stracks[i].end_frame() > this->max_time_lost) + { + this->lost_stracks[i].mark_removed(); + removed_stracks.push_back(this->lost_stracks[i]); + } + } + + for (int i = 0; i < this->tracked_stracks.size(); i++) + { + if (this->tracked_stracks[i].state == TrackState::Tracked) + { + tracked_stracks_swap.push_back(this->tracked_stracks[i]); + } + } + this->tracked_stracks.clear(); + this->tracked_stracks.assign(tracked_stracks_swap.begin(), tracked_stracks_swap.end()); + + this->tracked_stracks = joint_stracks(this->tracked_stracks, activated_stracks); + this->tracked_stracks = joint_stracks(this->tracked_stracks, refind_stracks); + + //std::cout << activated_stracks.size() << std::endl; + + this->lost_stracks = sub_stracks(this->lost_stracks, this->tracked_stracks); + for (int i = 0; i < lost_stracks.size(); i++) + { + this->lost_stracks.push_back(lost_stracks[i]); + } + + this->lost_stracks = sub_stracks(this->lost_stracks, this->removed_stracks); + for (int i = 0; i < removed_stracks.size(); i++) + { + this->removed_stracks.push_back(removed_stracks[i]); + } + + remove_duplicate_stracks(resa, resb, this->tracked_stracks, this->lost_stracks); + + this->tracked_stracks.clear(); + this->tracked_stracks.assign(resa.begin(), resa.end()); + this->lost_stracks.clear(); + this->lost_stracks.assign(resb.begin(), resb.end()); + + for (int i = 0; i < this->tracked_stracks.size(); i++) + { + if (this->tracked_stracks[i].is_activated) + { + output_stracks.push_back(this->tracked_stracks[i]); + } + } + return output_stracks; +} \ No newline at end of file diff --git a/deploy/ncnn/cpp/src/STrack.cpp b/deploy/ncnn/cpp/src/STrack.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8306165304355fe6d3d6e244207211757f21a646 --- /dev/null +++ b/deploy/ncnn/cpp/src/STrack.cpp @@ -0,0 +1,192 @@ +#include "STrack.h" + +STrack::STrack(vector tlwh_, float score) +{ + _tlwh.resize(4); + _tlwh.assign(tlwh_.begin(), tlwh_.end()); + + is_activated = false; + track_id = 0; + state = TrackState::New; + + tlwh.resize(4); + tlbr.resize(4); + + static_tlwh(); + static_tlbr(); + frame_id = 0; + tracklet_len = 0; + this->score = score; + start_frame = 0; +} + +STrack::~STrack() +{ +} + +void STrack::activate(byte_kalman::KalmanFilter &kalman_filter, int frame_id) +{ + this->kalman_filter = kalman_filter; + this->track_id = this->next_id(); + + vector _tlwh_tmp(4); + _tlwh_tmp[0] = this->_tlwh[0]; + _tlwh_tmp[1] = this->_tlwh[1]; + _tlwh_tmp[2] = this->_tlwh[2]; + _tlwh_tmp[3] = this->_tlwh[3]; + vector xyah = tlwh_to_xyah(_tlwh_tmp); + DETECTBOX xyah_box; + xyah_box[0] = xyah[0]; + xyah_box[1] = xyah[1]; + xyah_box[2] = xyah[2]; + xyah_box[3] = xyah[3]; + auto mc = this->kalman_filter.initiate(xyah_box); + this->mean = mc.first; + this->covariance = mc.second; + + static_tlwh(); + static_tlbr(); + + this->tracklet_len = 0; + this->state = TrackState::Tracked; + if (frame_id == 1) + { + this->is_activated = true; + } + //this->is_activated = true; + this->frame_id = frame_id; + this->start_frame = frame_id; +} + +void STrack::re_activate(STrack &new_track, int frame_id, bool new_id) +{ + vector xyah = tlwh_to_xyah(new_track.tlwh); + DETECTBOX xyah_box; + xyah_box[0] = xyah[0]; + xyah_box[1] = xyah[1]; + xyah_box[2] = xyah[2]; + xyah_box[3] = xyah[3]; + auto mc = this->kalman_filter.update(this->mean, this->covariance, xyah_box); + this->mean = mc.first; + this->covariance = mc.second; + + static_tlwh(); + static_tlbr(); + + this->tracklet_len = 0; + this->state = TrackState::Tracked; + this->is_activated = true; + this->frame_id = frame_id; + this->score = new_track.score; + if (new_id) + this->track_id = next_id(); +} + +void STrack::update(STrack &new_track, int frame_id) +{ + this->frame_id = frame_id; + this->tracklet_len++; + + vector xyah = tlwh_to_xyah(new_track.tlwh); + DETECTBOX xyah_box; + xyah_box[0] = xyah[0]; + xyah_box[1] = xyah[1]; + xyah_box[2] = xyah[2]; + xyah_box[3] = xyah[3]; + + auto mc = this->kalman_filter.update(this->mean, this->covariance, xyah_box); + this->mean = mc.first; + this->covariance = mc.second; + + static_tlwh(); + static_tlbr(); + + this->state = TrackState::Tracked; + this->is_activated = true; + + this->score = new_track.score; +} + +void STrack::static_tlwh() +{ + if (this->state == TrackState::New) + { + tlwh[0] = _tlwh[0]; + tlwh[1] = _tlwh[1]; + tlwh[2] = _tlwh[2]; + tlwh[3] = _tlwh[3]; + return; + } + + tlwh[0] = mean[0]; + tlwh[1] = mean[1]; + tlwh[2] = mean[2]; + tlwh[3] = mean[3]; + + tlwh[2] *= tlwh[3]; + tlwh[0] -= tlwh[2] / 2; + tlwh[1] -= tlwh[3] / 2; +} + +void STrack::static_tlbr() +{ + tlbr.clear(); + tlbr.assign(tlwh.begin(), tlwh.end()); + tlbr[2] += tlbr[0]; + tlbr[3] += tlbr[1]; +} + +vector STrack::tlwh_to_xyah(vector tlwh_tmp) +{ + vector tlwh_output = tlwh_tmp; + tlwh_output[0] += tlwh_output[2] / 2; + tlwh_output[1] += tlwh_output[3] / 2; + tlwh_output[2] /= tlwh_output[3]; + return tlwh_output; +} + +vector STrack::to_xyah() +{ + return tlwh_to_xyah(tlwh); +} + +vector STrack::tlbr_to_tlwh(vector &tlbr) +{ + tlbr[2] -= tlbr[0]; + tlbr[3] -= tlbr[1]; + return tlbr; +} + +void STrack::mark_lost() +{ + state = TrackState::Lost; +} + +void STrack::mark_removed() +{ + state = TrackState::Removed; +} + +int STrack::next_id() +{ + static int _count = 0; + _count++; + return _count; +} + +int STrack::end_frame() +{ + return this->frame_id; +} + +void STrack::multi_predict(vector &stracks, byte_kalman::KalmanFilter &kalman_filter) +{ + for (int i = 0; i < stracks.size(); i++) + { + if (stracks[i]->state != TrackState::Tracked) + { + stracks[i]->mean[7] = 0; + } + kalman_filter.predict(stracks[i]->mean, stracks[i]->covariance); + } +} \ No newline at end of file diff --git a/deploy/ncnn/cpp/src/bytetrack.cpp b/deploy/ncnn/cpp/src/bytetrack.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a129f146dd8faa3570bb590555e98a23bd9e4d23 --- /dev/null +++ b/deploy/ncnn/cpp/src/bytetrack.cpp @@ -0,0 +1,396 @@ +#include "layer.h" +#include "net.h" + +#if defined(USE_NCNN_SIMPLEOCV) +#include "simpleocv.h" +#include +#else +#include +#include +#include +#include +#endif +#include +#include +#include +#include +#include "BYTETracker.h" + +#define YOLOX_NMS_THRESH 0.7 // nms threshold +#define YOLOX_CONF_THRESH 0.1 // threshold of bounding box prob +#define INPUT_W 1088 // target image size w after resize +#define INPUT_H 608 // target image size h after resize + +Mat static_resize(Mat& img) { + float r = min(INPUT_W / (img.cols*1.0), INPUT_H / (img.rows*1.0)); + // r = std::min(r, 1.0f); + int unpad_w = r * img.cols; + int unpad_h = r * img.rows; + Mat re(unpad_h, unpad_w, CV_8UC3); + resize(img, re, re.size()); + Mat out(INPUT_H, INPUT_W, CV_8UC3, Scalar(114, 114, 114)); + re.copyTo(out(Rect(0, 0, re.cols, re.rows))); + return out; +} + +// YOLOX use the same focus in yolov5 +class YoloV5Focus : public ncnn::Layer +{ +public: + YoloV5Focus() + { + one_blob_only = true; + } + + virtual int forward(const ncnn::Mat& bottom_blob, ncnn::Mat& top_blob, const ncnn::Option& opt) const + { + int w = bottom_blob.w; + int h = bottom_blob.h; + int channels = bottom_blob.c; + + int outw = w / 2; + int outh = h / 2; + int outc = channels * 4; + + top_blob.create(outw, outh, outc, 4u, 1, opt.blob_allocator); + if (top_blob.empty()) + return -100; + + #pragma omp parallel for num_threads(opt.num_threads) + for (int p = 0; p < outc; p++) + { + const float* ptr = bottom_blob.channel(p % channels).row((p / channels) % 2) + ((p / channels) / 2); + float* outptr = top_blob.channel(p); + + for (int i = 0; i < outh; i++) + { + for (int j = 0; j < outw; j++) + { + *outptr = *ptr; + + outptr += 1; + ptr += 2; + } + + ptr += w; + } + } + + return 0; + } +}; + +DEFINE_LAYER_CREATOR(YoloV5Focus) + +struct GridAndStride +{ + int grid0; + int grid1; + int stride; +}; + +static inline float intersection_area(const Object& a, const Object& b) +{ + cv::Rect_ inter = a.rect & b.rect; + return inter.area(); +} + +static void qsort_descent_inplace(std::vector& faceobjects, int left, int right) +{ + int i = left; + int j = right; + float p = faceobjects[(left + right) / 2].prob; + + while (i <= j) + { + while (faceobjects[i].prob > p) + i++; + + while (faceobjects[j].prob < p) + j--; + + if (i <= j) + { + // swap + std::swap(faceobjects[i], faceobjects[j]); + + i++; + j--; + } + } + + #pragma omp parallel sections + { + #pragma omp section + { + if (left < j) qsort_descent_inplace(faceobjects, left, j); + } + #pragma omp section + { + if (i < right) qsort_descent_inplace(faceobjects, i, right); + } + } +} + +static void qsort_descent_inplace(std::vector& objects) +{ + if (objects.empty()) + return; + + qsort_descent_inplace(objects, 0, objects.size() - 1); +} + +static void nms_sorted_bboxes(const std::vector& faceobjects, std::vector& picked, float nms_threshold) +{ + picked.clear(); + + const int n = faceobjects.size(); + + std::vector areas(n); + for (int i = 0; i < n; i++) + { + areas[i] = faceobjects[i].rect.area(); + } + + for (int i = 0; i < n; i++) + { + const Object& a = faceobjects[i]; + + int keep = 1; + for (int j = 0; j < (int)picked.size(); j++) + { + const Object& b = faceobjects[picked[j]]; + + // intersection over union + float inter_area = intersection_area(a, b); + float union_area = areas[i] + areas[picked[j]] - inter_area; + // float IoU = inter_area / union_area + if (inter_area / union_area > nms_threshold) + keep = 0; + } + + if (keep) + picked.push_back(i); + } +} + +static void generate_grids_and_stride(const int target_w, const int target_h, std::vector& strides, std::vector& grid_strides) +{ + for (int i = 0; i < (int)strides.size(); i++) + { + int stride = strides[i]; + int num_grid_w = target_w / stride; + int num_grid_h = target_h / stride; + for (int g1 = 0; g1 < num_grid_h; g1++) + { + for (int g0 = 0; g0 < num_grid_w; g0++) + { + GridAndStride gs; + gs.grid0 = g0; + gs.grid1 = g1; + gs.stride = stride; + grid_strides.push_back(gs); + } + } + } +} + +static void generate_yolox_proposals(std::vector grid_strides, const ncnn::Mat& feat_blob, float prob_threshold, std::vector& objects) +{ + const int num_grid = feat_blob.h; + const int num_class = feat_blob.w - 5; + const int num_anchors = grid_strides.size(); + + const float* feat_ptr = feat_blob.channel(0); + for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++) + { + const int grid0 = grid_strides[anchor_idx].grid0; + const int grid1 = grid_strides[anchor_idx].grid1; + const int stride = grid_strides[anchor_idx].stride; + + // yolox/models/yolo_head.py decode logic + // outputs[..., :2] = (outputs[..., :2] + grids) * strides + // outputs[..., 2:4] = torch.exp(outputs[..., 2:4]) * strides + float x_center = (feat_ptr[0] + grid0) * stride; + float y_center = (feat_ptr[1] + grid1) * stride; + float w = exp(feat_ptr[2]) * stride; + float h = exp(feat_ptr[3]) * stride; + float x0 = x_center - w * 0.5f; + float y0 = y_center - h * 0.5f; + + float box_objectness = feat_ptr[4]; + for (int class_idx = 0; class_idx < num_class; class_idx++) + { + float box_cls_score = feat_ptr[5 + class_idx]; + float box_prob = box_objectness * box_cls_score; + if (box_prob > prob_threshold) + { + Object obj; + obj.rect.x = x0; + obj.rect.y = y0; + obj.rect.width = w; + obj.rect.height = h; + obj.label = class_idx; + obj.prob = box_prob; + + objects.push_back(obj); + } + + } // class loop + feat_ptr += feat_blob.w; + + } // point anchor loop +} + +static int detect_yolox(ncnn::Mat& in_pad, std::vector& objects, ncnn::Extractor ex, float scale) +{ + + ex.input("images", in_pad); + + std::vector proposals; + + { + ncnn::Mat out; + ex.extract("output", out); + + static const int stride_arr[] = {8, 16, 32}; // might have stride=64 in YOLOX + std::vector strides(stride_arr, stride_arr + sizeof(stride_arr) / sizeof(stride_arr[0])); + std::vector grid_strides; + generate_grids_and_stride(INPUT_W, INPUT_H, strides, grid_strides); + generate_yolox_proposals(grid_strides, out, YOLOX_CONF_THRESH, proposals); + } + // sort all proposals by score from highest to lowest + qsort_descent_inplace(proposals); + + // apply nms with nms_threshold + std::vector picked; + nms_sorted_bboxes(proposals, picked, YOLOX_NMS_THRESH); + + int count = picked.size(); + + objects.resize(count); + for (int i = 0; i < count; i++) + { + objects[i] = proposals[picked[i]]; + + // adjust offset to original unpadded + float x0 = (objects[i].rect.x) / scale; + float y0 = (objects[i].rect.y) / scale; + float x1 = (objects[i].rect.x + objects[i].rect.width) / scale; + float y1 = (objects[i].rect.y + objects[i].rect.height) / scale; + + // clip + // x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f); + // y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f); + // x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f); + // y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f); + + objects[i].rect.x = x0; + objects[i].rect.y = y0; + objects[i].rect.width = x1 - x0; + objects[i].rect.height = y1 - y0; + } + + return 0; +} + +int main(int argc, char** argv) +{ + if (argc != 2) + { + fprintf(stderr, "Usage: %s [videopath]\n", argv[0]); + return -1; + } + + ncnn::Net yolox; + + //yolox.opt.use_vulkan_compute = true; + //yolox.opt.use_bf16_storage = true; + yolox.opt.num_threads = 20; + //ncnn::set_cpu_powersave(0); + + //ncnn::set_omp_dynamic(0); + //ncnn::set_omp_num_threads(20); + + // Focus in yolov5 + yolox.register_custom_layer("YoloV5Focus", YoloV5Focus_layer_creator); + + yolox.load_param("bytetrack_s_op.param"); + yolox.load_model("bytetrack_s_op.bin"); + + ncnn::Extractor ex = yolox.create_extractor(); + + const char* videopath = argv[1]; + + VideoCapture cap(videopath); + if (!cap.isOpened()) + return 0; + + int img_w = cap.get(CV_CAP_PROP_FRAME_WIDTH); + int img_h = cap.get(CV_CAP_PROP_FRAME_HEIGHT); + int fps = cap.get(CV_CAP_PROP_FPS); + long nFrame = static_cast(cap.get(CV_CAP_PROP_FRAME_COUNT)); + cout << "Total frames: " << nFrame << endl; + + VideoWriter writer("demo.mp4", CV_FOURCC('m', 'p', '4', 'v'), fps, Size(img_w, img_h)); + + Mat img; + BYTETracker tracker(fps, 30); + int num_frames = 0; + int total_ms = 1; + for (;;) + { + if(!cap.read(img)) + break; + num_frames ++; + if (num_frames % 20 == 0) + { + cout << "Processing frame " << num_frames << " (" << num_frames * 1000000 / total_ms << " fps)" << endl; + } + if (img.empty()) + break; + + float scale = min(INPUT_W / (img.cols*1.0), INPUT_H / (img.rows*1.0)); + Mat pr_img = static_resize(img); + ncnn::Mat in_pad = ncnn::Mat::from_pixels_resize(pr_img.data, ncnn::Mat::PIXEL_BGR2RGB, INPUT_W, INPUT_H, INPUT_W, INPUT_H); + + // python 0-1 input tensor with rgb_means = (0.485, 0.456, 0.406), std = (0.229, 0.224, 0.225) + // so for 0-255 input image, rgb_mean should multiply 255 and norm should div by std. + const float mean_vals[3] = {255.f * 0.485f, 255.f * 0.456, 255.f * 0.406f}; + const float norm_vals[3] = {1 / (255.f * 0.229f), 1 / (255.f * 0.224f), 1 / (255.f * 0.225f)}; + + in_pad.substract_mean_normalize(mean_vals, norm_vals); + + std::vector objects; + auto start = chrono::system_clock::now(); + //detect_yolox(img, objects); + detect_yolox(in_pad, objects, ex, scale); + vector output_stracks = tracker.update(objects); + auto end = chrono::system_clock::now(); + total_ms = total_ms + chrono::duration_cast(end - start).count(); + for (int i = 0; i < output_stracks.size(); i++) + { + vector tlwh = output_stracks[i].tlwh; + bool vertical = tlwh[2] / tlwh[3] > 1.6; + if (tlwh[2] * tlwh[3] > 20 && !vertical) + { + Scalar s = tracker.get_color(output_stracks[i].track_id); + putText(img, format("%d", output_stracks[i].track_id), Point(tlwh[0], tlwh[1] - 5), + 0, 0.6, Scalar(0, 0, 255), 2, LINE_AA); + rectangle(img, Rect(tlwh[0], tlwh[1], tlwh[2], tlwh[3]), s, 2); + } + } + putText(img, format("frame: %d fps: %d num: %d", num_frames, num_frames * 1000000 / total_ms, output_stracks.size()), + Point(0, 30), 0, 0.6, Scalar(0, 0, 255), 2, LINE_AA); + writer.write(img); + char c = waitKey(1); + if (c > 0) + { + break; + } + } + cap.release(); + cout << "FPS: " << num_frames * 1000000 / total_ms << endl; + + return 0; +} diff --git a/deploy/ncnn/cpp/src/kalmanFilter.cpp b/deploy/ncnn/cpp/src/kalmanFilter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..168432a46810d0c1296f4b17500d41f8b4f308b4 --- /dev/null +++ b/deploy/ncnn/cpp/src/kalmanFilter.cpp @@ -0,0 +1,152 @@ +#include "kalmanFilter.h" +#include + +namespace byte_kalman +{ + const double KalmanFilter::chi2inv95[10] = { + 0, + 3.8415, + 5.9915, + 7.8147, + 9.4877, + 11.070, + 12.592, + 14.067, + 15.507, + 16.919 + }; + KalmanFilter::KalmanFilter() + { + int ndim = 4; + double dt = 1.; + + _motion_mat = Eigen::MatrixXf::Identity(8, 8); + for (int i = 0; i < ndim; i++) { + _motion_mat(i, ndim + i) = dt; + } + _update_mat = Eigen::MatrixXf::Identity(4, 8); + + this->_std_weight_position = 1. / 20; + this->_std_weight_velocity = 1. / 160; + } + + KAL_DATA KalmanFilter::initiate(const DETECTBOX &measurement) + { + DETECTBOX mean_pos = measurement; + DETECTBOX mean_vel; + for (int i = 0; i < 4; i++) mean_vel(i) = 0; + + KAL_MEAN mean; + for (int i = 0; i < 8; i++) { + if (i < 4) mean(i) = mean_pos(i); + else mean(i) = mean_vel(i - 4); + } + + KAL_MEAN std; + std(0) = 2 * _std_weight_position * measurement[3]; + std(1) = 2 * _std_weight_position * measurement[3]; + std(2) = 1e-2; + std(3) = 2 * _std_weight_position * measurement[3]; + std(4) = 10 * _std_weight_velocity * measurement[3]; + std(5) = 10 * _std_weight_velocity * measurement[3]; + std(6) = 1e-5; + std(7) = 10 * _std_weight_velocity * measurement[3]; + + KAL_MEAN tmp = std.array().square(); + KAL_COVA var = tmp.asDiagonal(); + return std::make_pair(mean, var); + } + + void KalmanFilter::predict(KAL_MEAN &mean, KAL_COVA &covariance) + { + //revise the data; + DETECTBOX std_pos; + std_pos << _std_weight_position * mean(3), + _std_weight_position * mean(3), + 1e-2, + _std_weight_position * mean(3); + DETECTBOX std_vel; + std_vel << _std_weight_velocity * mean(3), + _std_weight_velocity * mean(3), + 1e-5, + _std_weight_velocity * mean(3); + KAL_MEAN tmp; + tmp.block<1, 4>(0, 0) = std_pos; + tmp.block<1, 4>(0, 4) = std_vel; + tmp = tmp.array().square(); + KAL_COVA motion_cov = tmp.asDiagonal(); + KAL_MEAN mean1 = this->_motion_mat * mean.transpose(); + KAL_COVA covariance1 = this->_motion_mat * covariance *(_motion_mat.transpose()); + covariance1 += motion_cov; + + mean = mean1; + covariance = covariance1; + } + + KAL_HDATA KalmanFilter::project(const KAL_MEAN &mean, const KAL_COVA &covariance) + { + DETECTBOX std; + std << _std_weight_position * mean(3), _std_weight_position * mean(3), + 1e-1, _std_weight_position * mean(3); + KAL_HMEAN mean1 = _update_mat * mean.transpose(); + KAL_HCOVA covariance1 = _update_mat * covariance * (_update_mat.transpose()); + Eigen::Matrix diag = std.asDiagonal(); + diag = diag.array().square().matrix(); + covariance1 += diag; + // covariance1.diagonal() << diag; + return std::make_pair(mean1, covariance1); + } + + KAL_DATA + KalmanFilter::update( + const KAL_MEAN &mean, + const KAL_COVA &covariance, + const DETECTBOX &measurement) + { + KAL_HDATA pa = project(mean, covariance); + KAL_HMEAN projected_mean = pa.first; + KAL_HCOVA projected_cov = pa.second; + + //chol_factor, lower = + //scipy.linalg.cho_factor(projected_cov, lower=True, check_finite=False) + //kalmain_gain = + //scipy.linalg.cho_solve((cho_factor, lower), + //np.dot(covariance, self._upadte_mat.T).T, + //check_finite=False).T + Eigen::Matrix B = (covariance * (_update_mat.transpose())).transpose(); + Eigen::Matrix kalman_gain = (projected_cov.llt().solve(B)).transpose(); // eg.8x4 + Eigen::Matrix innovation = measurement - projected_mean; //eg.1x4 + auto tmp = innovation * (kalman_gain.transpose()); + KAL_MEAN new_mean = (mean.array() + tmp.array()).matrix(); + KAL_COVA new_covariance = covariance - kalman_gain * projected_cov*(kalman_gain.transpose()); + return std::make_pair(new_mean, new_covariance); + } + + Eigen::Matrix + KalmanFilter::gating_distance( + const KAL_MEAN &mean, + const KAL_COVA &covariance, + const std::vector &measurements, + bool only_position) + { + KAL_HDATA pa = this->project(mean, covariance); + if (only_position) { + printf("not implement!"); + exit(0); + } + KAL_HMEAN mean1 = pa.first; + KAL_HCOVA covariance1 = pa.second; + + // Eigen::Matrix d(size, 4); + DETECTBOXSS d(measurements.size(), 4); + int pos = 0; + for (DETECTBOX box : measurements) { + d.row(pos++) = box - mean1; + } + Eigen::Matrix factor = covariance1.llt().matrixL(); + Eigen::Matrix z = factor.triangularView().solve(d).transpose(); + auto zz = ((z.array())*(z.array())).matrix(); + auto square_maha = zz.colwise().sum(); + return square_maha; + } +} \ No newline at end of file diff --git a/deploy/ncnn/cpp/src/lapjv.cpp b/deploy/ncnn/cpp/src/lapjv.cpp new file mode 100644 index 0000000000000000000000000000000000000000..169efd51f915adf8c666f3f4978f1cb7b2d3e1b3 --- /dev/null +++ b/deploy/ncnn/cpp/src/lapjv.cpp @@ -0,0 +1,343 @@ +#include +#include +#include + +#include "lapjv.h" + +/** Column-reduction and reduction transfer for a dense cost matrix. + */ +int_t _ccrrt_dense(const uint_t n, cost_t *cost[], + int_t *free_rows, int_t *x, int_t *y, cost_t *v) +{ + int_t n_free_rows; + boolean *unique; + + for (uint_t i = 0; i < n; i++) { + x[i] = -1; + v[i] = LARGE; + y[i] = 0; + } + for (uint_t i = 0; i < n; i++) { + for (uint_t j = 0; j < n; j++) { + const cost_t c = cost[i][j]; + if (c < v[j]) { + v[j] = c; + y[j] = i; + } + PRINTF("i=%d, j=%d, c[i,j]=%f, v[j]=%f y[j]=%d\n", i, j, c, v[j], y[j]); + } + } + PRINT_COST_ARRAY(v, n); + PRINT_INDEX_ARRAY(y, n); + NEW(unique, boolean, n); + memset(unique, TRUE, n); + { + int_t j = n; + do { + j--; + const int_t i = y[j]; + if (x[i] < 0) { + x[i] = j; + } + else { + unique[i] = FALSE; + y[j] = -1; + } + } while (j > 0); + } + n_free_rows = 0; + for (uint_t i = 0; i < n; i++) { + if (x[i] < 0) { + free_rows[n_free_rows++] = i; + } + else if (unique[i]) { + const int_t j = x[i]; + cost_t min = LARGE; + for (uint_t j2 = 0; j2 < n; j2++) { + if (j2 == (uint_t)j) { + continue; + } + const cost_t c = cost[i][j2] - v[j2]; + if (c < min) { + min = c; + } + } + PRINTF("v[%d] = %f - %f\n", j, v[j], min); + v[j] -= min; + } + } + FREE(unique); + return n_free_rows; +} + + +/** Augmenting row reduction for a dense cost matrix. + */ +int_t _carr_dense( + const uint_t n, cost_t *cost[], + const uint_t n_free_rows, + int_t *free_rows, int_t *x, int_t *y, cost_t *v) +{ + uint_t current = 0; + int_t new_free_rows = 0; + uint_t rr_cnt = 0; + PRINT_INDEX_ARRAY(x, n); + PRINT_INDEX_ARRAY(y, n); + PRINT_COST_ARRAY(v, n); + PRINT_INDEX_ARRAY(free_rows, n_free_rows); + while (current < n_free_rows) { + int_t i0; + int_t j1, j2; + cost_t v1, v2, v1_new; + boolean v1_lowers; + + rr_cnt++; + PRINTF("current = %d rr_cnt = %d\n", current, rr_cnt); + const int_t free_i = free_rows[current++]; + j1 = 0; + v1 = cost[free_i][0] - v[0]; + j2 = -1; + v2 = LARGE; + for (uint_t j = 1; j < n; j++) { + PRINTF("%d = %f %d = %f\n", j1, v1, j2, v2); + const cost_t c = cost[free_i][j] - v[j]; + if (c < v2) { + if (c >= v1) { + v2 = c; + j2 = j; + } + else { + v2 = v1; + v1 = c; + j2 = j1; + j1 = j; + } + } + } + i0 = y[j1]; + v1_new = v[j1] - (v2 - v1); + v1_lowers = v1_new < v[j1]; + PRINTF("%d %d 1=%d,%f 2=%d,%f v1'=%f(%d,%g) \n", free_i, i0, j1, v1, j2, v2, v1_new, v1_lowers, v[j1] - v1_new); + if (rr_cnt < current * n) { + if (v1_lowers) { + v[j1] = v1_new; + } + else if (i0 >= 0 && j2 >= 0) { + j1 = j2; + i0 = y[j2]; + } + if (i0 >= 0) { + if (v1_lowers) { + free_rows[--current] = i0; + } + else { + free_rows[new_free_rows++] = i0; + } + } + } + else { + PRINTF("rr_cnt=%d >= %d (current=%d * n=%d)\n", rr_cnt, current * n, current, n); + if (i0 >= 0) { + free_rows[new_free_rows++] = i0; + } + } + x[free_i] = j1; + y[j1] = free_i; + } + return new_free_rows; +} + + +/** Find columns with minimum d[j] and put them on the SCAN list. + */ +uint_t _find_dense(const uint_t n, uint_t lo, cost_t *d, int_t *cols, int_t *y) +{ + uint_t hi = lo + 1; + cost_t mind = d[cols[lo]]; + for (uint_t k = hi; k < n; k++) { + int_t j = cols[k]; + if (d[j] <= mind) { + if (d[j] < mind) { + hi = lo; + mind = d[j]; + } + cols[k] = cols[hi]; + cols[hi++] = j; + } + } + return hi; +} + + +// Scan all columns in TODO starting from arbitrary column in SCAN +// and try to decrease d of the TODO columns using the SCAN column. +int_t _scan_dense(const uint_t n, cost_t *cost[], + uint_t *plo, uint_t*phi, + cost_t *d, int_t *cols, int_t *pred, + int_t *y, cost_t *v) +{ + uint_t lo = *plo; + uint_t hi = *phi; + cost_t h, cred_ij; + + while (lo != hi) { + int_t j = cols[lo++]; + const int_t i = y[j]; + const cost_t mind = d[j]; + h = cost[i][j] - v[j] - mind; + PRINTF("i=%d j=%d h=%f\n", i, j, h); + // For all columns in TODO + for (uint_t k = hi; k < n; k++) { + j = cols[k]; + cred_ij = cost[i][j] - v[j] - h; + if (cred_ij < d[j]) { + d[j] = cred_ij; + pred[j] = i; + if (cred_ij == mind) { + if (y[j] < 0) { + return j; + } + cols[k] = cols[hi]; + cols[hi++] = j; + } + } + } + } + *plo = lo; + *phi = hi; + return -1; +} + + +/** Single iteration of modified Dijkstra shortest path algorithm as explained in the JV paper. + * + * This is a dense matrix version. + * + * \return The closest free column index. + */ +int_t find_path_dense( + const uint_t n, cost_t *cost[], + const int_t start_i, + int_t *y, cost_t *v, + int_t *pred) +{ + uint_t lo = 0, hi = 0; + int_t final_j = -1; + uint_t n_ready = 0; + int_t *cols; + cost_t *d; + + NEW(cols, int_t, n); + NEW(d, cost_t, n); + + for (uint_t i = 0; i < n; i++) { + cols[i] = i; + pred[i] = start_i; + d[i] = cost[start_i][i] - v[i]; + } + PRINT_COST_ARRAY(d, n); + while (final_j == -1) { + // No columns left on the SCAN list. + if (lo == hi) { + PRINTF("%d..%d -> find\n", lo, hi); + n_ready = lo; + hi = _find_dense(n, lo, d, cols, y); + PRINTF("check %d..%d\n", lo, hi); + PRINT_INDEX_ARRAY(cols, n); + for (uint_t k = lo; k < hi; k++) { + const int_t j = cols[k]; + if (y[j] < 0) { + final_j = j; + } + } + } + if (final_j == -1) { + PRINTF("%d..%d -> scan\n", lo, hi); + final_j = _scan_dense( + n, cost, &lo, &hi, d, cols, pred, y, v); + PRINT_COST_ARRAY(d, n); + PRINT_INDEX_ARRAY(cols, n); + PRINT_INDEX_ARRAY(pred, n); + } + } + + PRINTF("found final_j=%d\n", final_j); + PRINT_INDEX_ARRAY(cols, n); + { + const cost_t mind = d[cols[lo]]; + for (uint_t k = 0; k < n_ready; k++) { + const int_t j = cols[k]; + v[j] += d[j] - mind; + } + } + + FREE(cols); + FREE(d); + + return final_j; +} + + +/** Augment for a dense cost matrix. + */ +int_t _ca_dense( + const uint_t n, cost_t *cost[], + const uint_t n_free_rows, + int_t *free_rows, int_t *x, int_t *y, cost_t *v) +{ + int_t *pred; + + NEW(pred, int_t, n); + + for (int_t *pfree_i = free_rows; pfree_i < free_rows + n_free_rows; pfree_i++) { + int_t i = -1, j; + uint_t k = 0; + + PRINTF("looking at free_i=%d\n", *pfree_i); + j = find_path_dense(n, cost, *pfree_i, y, v, pred); + ASSERT(j >= 0); + ASSERT(j < n); + while (i != *pfree_i) { + PRINTF("augment %d\n", j); + PRINT_INDEX_ARRAY(pred, n); + i = pred[j]; + PRINTF("y[%d]=%d -> %d\n", j, y[j], i); + y[j] = i; + PRINT_INDEX_ARRAY(x, n); + SWAP_INDICES(j, x[i]); + k++; + if (k >= n) { + ASSERT(FALSE); + } + } + } + FREE(pred); + return 0; +} + + +/** Solve dense sparse LAP. + */ +int lapjv_internal( + const uint_t n, cost_t *cost[], + int_t *x, int_t *y) +{ + int ret; + int_t *free_rows; + cost_t *v; + + NEW(free_rows, int_t, n); + NEW(v, cost_t, n); + ret = _ccrrt_dense(n, cost, free_rows, x, y, v); + int i = 0; + while (ret > 0 && i < 2) { + ret = _carr_dense(n, cost, ret, free_rows, x, y, v); + i++; + } + if (ret > 0) { + ret = _ca_dense(n, cost, ret, free_rows, x, y, v); + } + FREE(v); + FREE(free_rows); + return ret; +} \ No newline at end of file diff --git a/deploy/ncnn/cpp/src/utils.cpp b/deploy/ncnn/cpp/src/utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4aa0305cd6cf025496528ef9ff49075209fe9e8c --- /dev/null +++ b/deploy/ncnn/cpp/src/utils.cpp @@ -0,0 +1,429 @@ +#include "BYTETracker.h" +#include "lapjv.h" + +vector BYTETracker::joint_stracks(vector &tlista, vector &tlistb) +{ + map exists; + vector res; + for (int i = 0; i < tlista.size(); i++) + { + exists.insert(pair(tlista[i]->track_id, 1)); + res.push_back(tlista[i]); + } + for (int i = 0; i < tlistb.size(); i++) + { + int tid = tlistb[i].track_id; + if (!exists[tid] || exists.count(tid) == 0) + { + exists[tid] = 1; + res.push_back(&tlistb[i]); + } + } + return res; +} + +vector BYTETracker::joint_stracks(vector &tlista, vector &tlistb) +{ + map exists; + vector res; + for (int i = 0; i < tlista.size(); i++) + { + exists.insert(pair(tlista[i].track_id, 1)); + res.push_back(tlista[i]); + } + for (int i = 0; i < tlistb.size(); i++) + { + int tid = tlistb[i].track_id; + if (!exists[tid] || exists.count(tid) == 0) + { + exists[tid] = 1; + res.push_back(tlistb[i]); + } + } + return res; +} + +vector BYTETracker::sub_stracks(vector &tlista, vector &tlistb) +{ + map stracks; + for (int i = 0; i < tlista.size(); i++) + { + stracks.insert(pair(tlista[i].track_id, tlista[i])); + } + for (int i = 0; i < tlistb.size(); i++) + { + int tid = tlistb[i].track_id; + if (stracks.count(tid) != 0) + { + stracks.erase(tid); + } + } + + vector res; + std::map::iterator it; + for (it = stracks.begin(); it != stracks.end(); ++it) + { + res.push_back(it->second); + } + + return res; +} + +void BYTETracker::remove_duplicate_stracks(vector &resa, vector &resb, vector &stracksa, vector &stracksb) +{ + vector > pdist = iou_distance(stracksa, stracksb); + vector > pairs; + for (int i = 0; i < pdist.size(); i++) + { + for (int j = 0; j < pdist[i].size(); j++) + { + if (pdist[i][j] < 0.15) + { + pairs.push_back(pair(i, j)); + } + } + } + + vector dupa, dupb; + for (int i = 0; i < pairs.size(); i++) + { + int timep = stracksa[pairs[i].first].frame_id - stracksa[pairs[i].first].start_frame; + int timeq = stracksb[pairs[i].second].frame_id - stracksb[pairs[i].second].start_frame; + if (timep > timeq) + dupb.push_back(pairs[i].second); + else + dupa.push_back(pairs[i].first); + } + + for (int i = 0; i < stracksa.size(); i++) + { + vector::iterator iter = find(dupa.begin(), dupa.end(), i); + if (iter == dupa.end()) + { + resa.push_back(stracksa[i]); + } + } + + for (int i = 0; i < stracksb.size(); i++) + { + vector::iterator iter = find(dupb.begin(), dupb.end(), i); + if (iter == dupb.end()) + { + resb.push_back(stracksb[i]); + } + } +} + +void BYTETracker::linear_assignment(vector > &cost_matrix, int cost_matrix_size, int cost_matrix_size_size, float thresh, + vector > &matches, vector &unmatched_a, vector &unmatched_b) +{ + if (cost_matrix.size() == 0) + { + for (int i = 0; i < cost_matrix_size; i++) + { + unmatched_a.push_back(i); + } + for (int i = 0; i < cost_matrix_size_size; i++) + { + unmatched_b.push_back(i); + } + return; + } + + vector rowsol; vector colsol; + float c = lapjv(cost_matrix, rowsol, colsol, true, thresh); + for (int i = 0; i < rowsol.size(); i++) + { + if (rowsol[i] >= 0) + { + vector match; + match.push_back(i); + match.push_back(rowsol[i]); + matches.push_back(match); + } + else + { + unmatched_a.push_back(i); + } + } + + for (int i = 0; i < colsol.size(); i++) + { + if (colsol[i] < 0) + { + unmatched_b.push_back(i); + } + } +} + +vector > BYTETracker::ious(vector > &atlbrs, vector > &btlbrs) +{ + vector > ious; + if (atlbrs.size()*btlbrs.size() == 0) + return ious; + + ious.resize(atlbrs.size()); + for (int i = 0; i < ious.size(); i++) + { + ious[i].resize(btlbrs.size()); + } + + //bbox_ious + for (int k = 0; k < btlbrs.size(); k++) + { + vector ious_tmp; + float box_area = (btlbrs[k][2] - btlbrs[k][0] + 1)*(btlbrs[k][3] - btlbrs[k][1] + 1); + for (int n = 0; n < atlbrs.size(); n++) + { + float iw = min(atlbrs[n][2], btlbrs[k][2]) - max(atlbrs[n][0], btlbrs[k][0]) + 1; + if (iw > 0) + { + float ih = min(atlbrs[n][3], btlbrs[k][3]) - max(atlbrs[n][1], btlbrs[k][1]) + 1; + if(ih > 0) + { + float ua = (atlbrs[n][2] - atlbrs[n][0] + 1)*(atlbrs[n][3] - atlbrs[n][1] + 1) + box_area - iw * ih; + ious[n][k] = iw * ih / ua; + } + else + { + ious[n][k] = 0.0; + } + } + else + { + ious[n][k] = 0.0; + } + } + } + + return ious; +} + +vector > BYTETracker::iou_distance(vector &atracks, vector &btracks, int &dist_size, int &dist_size_size) +{ + vector > cost_matrix; + if (atracks.size() * btracks.size() == 0) + { + dist_size = atracks.size(); + dist_size_size = btracks.size(); + return cost_matrix; + } + vector > atlbrs, btlbrs; + for (int i = 0; i < atracks.size(); i++) + { + atlbrs.push_back(atracks[i]->tlbr); + } + for (int i = 0; i < btracks.size(); i++) + { + btlbrs.push_back(btracks[i].tlbr); + } + + dist_size = atracks.size(); + dist_size_size = btracks.size(); + + vector > _ious = ious(atlbrs, btlbrs); + + for (int i = 0; i < _ious.size();i++) + { + vector _iou; + for (int j = 0; j < _ious[i].size(); j++) + { + _iou.push_back(1 - _ious[i][j]); + } + cost_matrix.push_back(_iou); + } + + return cost_matrix; +} + +vector > BYTETracker::iou_distance(vector &atracks, vector &btracks) +{ + vector > atlbrs, btlbrs; + for (int i = 0; i < atracks.size(); i++) + { + atlbrs.push_back(atracks[i].tlbr); + } + for (int i = 0; i < btracks.size(); i++) + { + btlbrs.push_back(btracks[i].tlbr); + } + + vector > _ious = ious(atlbrs, btlbrs); + vector > cost_matrix; + for (int i = 0; i < _ious.size(); i++) + { + vector _iou; + for (int j = 0; j < _ious[i].size(); j++) + { + _iou.push_back(1 - _ious[i][j]); + } + cost_matrix.push_back(_iou); + } + + return cost_matrix; +} + +double BYTETracker::lapjv(const vector > &cost, vector &rowsol, vector &colsol, + bool extend_cost, float cost_limit, bool return_cost) +{ + vector > cost_c; + cost_c.assign(cost.begin(), cost.end()); + + vector > cost_c_extended; + + int n_rows = cost.size(); + int n_cols = cost[0].size(); + rowsol.resize(n_rows); + colsol.resize(n_cols); + + int n = 0; + if (n_rows == n_cols) + { + n = n_rows; + } + else + { + if (!extend_cost) + { + cout << "set extend_cost=True" << endl; + system("pause"); + exit(0); + } + } + + if (extend_cost || cost_limit < LONG_MAX) + { + n = n_rows + n_cols; + cost_c_extended.resize(n); + for (int i = 0; i < cost_c_extended.size(); i++) + cost_c_extended[i].resize(n); + + if (cost_limit < LONG_MAX) + { + for (int i = 0; i < cost_c_extended.size(); i++) + { + for (int j = 0; j < cost_c_extended[i].size(); j++) + { + cost_c_extended[i][j] = cost_limit / 2.0; + } + } + } + else + { + float cost_max = -1; + for (int i = 0; i < cost_c.size(); i++) + { + for (int j = 0; j < cost_c[i].size(); j++) + { + if (cost_c[i][j] > cost_max) + cost_max = cost_c[i][j]; + } + } + for (int i = 0; i < cost_c_extended.size(); i++) + { + for (int j = 0; j < cost_c_extended[i].size(); j++) + { + cost_c_extended[i][j] = cost_max + 1; + } + } + } + + for (int i = n_rows; i < cost_c_extended.size(); i++) + { + for (int j = n_cols; j < cost_c_extended[i].size(); j++) + { + cost_c_extended[i][j] = 0; + } + } + for (int i = 0; i < n_rows; i++) + { + for (int j = 0; j < n_cols; j++) + { + cost_c_extended[i][j] = cost_c[i][j]; + } + } + + cost_c.clear(); + cost_c.assign(cost_c_extended.begin(), cost_c_extended.end()); + } + + double **cost_ptr; + cost_ptr = new double *[sizeof(double *) * n]; + for (int i = 0; i < n; i++) + cost_ptr[i] = new double[sizeof(double) * n]; + + for (int i = 0; i < n; i++) + { + for (int j = 0; j < n; j++) + { + cost_ptr[i][j] = cost_c[i][j]; + } + } + + int* x_c = new int[sizeof(int) * n]; + int *y_c = new int[sizeof(int) * n]; + + int ret = lapjv_internal(n, cost_ptr, x_c, y_c); + if (ret != 0) + { + cout << "Calculate Wrong!" << endl; + system("pause"); + exit(0); + } + + double opt = 0.0; + + if (n != n_rows) + { + for (int i = 0; i < n; i++) + { + if (x_c[i] >= n_cols) + x_c[i] = -1; + if (y_c[i] >= n_rows) + y_c[i] = -1; + } + for (int i = 0; i < n_rows; i++) + { + rowsol[i] = x_c[i]; + } + for (int i = 0; i < n_cols; i++) + { + colsol[i] = y_c[i]; + } + + if (return_cost) + { + for (int i = 0; i < rowsol.size(); i++) + { + if (rowsol[i] != -1) + { + //cout << i << "\t" << rowsol[i] << "\t" << cost_ptr[i][rowsol[i]] << endl; + opt += cost_ptr[i][rowsol[i]]; + } + } + } + } + else if (return_cost) + { + for (int i = 0; i < rowsol.size(); i++) + { + opt += cost_ptr[i][rowsol[i]]; + } + } + + for (int i = 0; i < n; i++) + { + delete[]cost_ptr[i]; + } + delete[]cost_ptr; + delete[]x_c; + delete[]y_c; + + return opt; +} + +Scalar BYTETracker::get_color(int idx) +{ + idx += 3; + return Scalar(37 * idx % 255, 17 * idx % 255, 29 * idx % 255); +} \ No newline at end of file diff --git a/exps/default/nano.py b/exps/default/nano.py new file mode 100644 index 0000000000000000000000000000000000000000..a622830e2ebb92e10cf56efdfdaf4243c877d6de --- /dev/null +++ b/exps/default/nano.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os +import torch.nn as nn + +from yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 0.33 + self.width = 0.25 + self.scale = (0.5, 1.5) + self.random_size = (10, 20) + self.test_size = (416, 416) + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + self.enable_mixup = False + + def get_model(self, sublinear=False): + + def init_yolo(M): + for m in M.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eps = 1e-3 + m.momentum = 0.03 + if "model" not in self.__dict__: + from yolox.models import YOLOX, YOLOPAFPN, YOLOXHead + in_channels = [256, 512, 1024] + # NANO model use depthwise = True, which is main difference. + backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, depthwise=True) + head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, depthwise=True) + self.model = YOLOX(backbone, head) + + self.model.apply(init_yolo) + self.model.head.initialize_biases(1e-2) + return self.model diff --git a/exps/default/yolov3.py b/exps/default/yolov3.py new file mode 100644 index 0000000000000000000000000000000000000000..85d59963d7ccb5868bf010facc9eedc1821f85ea --- /dev/null +++ b/exps/default/yolov3.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os +import torch +import torch.nn as nn + +from yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 1.0 + self.width = 1.0 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + def get_model(self, sublinear=False): + def init_yolo(M): + for m in M.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eps = 1e-3 + m.momentum = 0.03 + if "model" not in self.__dict__: + from yolox.models import YOLOX, YOLOFPN, YOLOXHead + backbone = YOLOFPN() + head = YOLOXHead(self.num_classes, self.width, in_channels=[128, 256, 512], act="lrelu") + self.model = YOLOX(backbone, head) + self.model.apply(init_yolo) + self.model.head.initialize_biases(1e-2) + + return self.model + + def get_data_loader(self, batch_size, is_distributed, no_aug=False): + from data.datasets.cocodataset import COCODataset + from data.datasets.mosaicdetection import MosaicDetection + from data.datasets.data_augment import TrainTransform + from data.datasets.dataloading import YoloBatchSampler, DataLoader, InfiniteSampler + import torch.distributed as dist + + dataset = COCODataset( + data_dir='data/COCO/', + json_file=self.train_ann, + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=50 + ), + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=120 + ), + degrees=self.degrees, + translate=self.translate, + scale=self.scale, + shear=self.shear, + perspective=self.perspective, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0) + else: + sampler = torch.utils.data.RandomSampler(self.dataset) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + input_dimension=self.input_size, + mosaic=not no_aug + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader diff --git a/exps/default/yolox_l.py b/exps/default/yolox_l.py new file mode 100644 index 0000000000000000000000000000000000000000..50833ca38c51fe9ac5e327d7c1c0561fb62249aa --- /dev/null +++ b/exps/default/yolox_l.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +from yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 1.0 + self.width = 1.0 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] diff --git a/exps/default/yolox_m.py b/exps/default/yolox_m.py new file mode 100644 index 0000000000000000000000000000000000000000..9666a31177b9cc1c94978f9867aaceac8ddebce2 --- /dev/null +++ b/exps/default/yolox_m.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +from yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 0.67 + self.width = 0.75 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] diff --git a/exps/default/yolox_s.py b/exps/default/yolox_s.py new file mode 100644 index 0000000000000000000000000000000000000000..abb6a8bbbe4fd1c6aff71596621aaeec2a6a15d8 --- /dev/null +++ b/exps/default/yolox_s.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +from yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 0.33 + self.width = 0.50 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] diff --git a/exps/default/yolox_tiny.py b/exps/default/yolox_tiny.py new file mode 100644 index 0000000000000000000000000000000000000000..9ea66048cbf68c3b39712dd84f92b800adea413b --- /dev/null +++ b/exps/default/yolox_tiny.py @@ -0,0 +1,19 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +from yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 0.33 + self.width = 0.375 + self.scale = (0.5, 1.5) + self.random_size = (10, 20) + self.test_size = (416, 416) + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + self.enable_mixup = False diff --git a/exps/default/yolox_x.py b/exps/default/yolox_x.py new file mode 100644 index 0000000000000000000000000000000000000000..ac498a1fb91f597e9362c2b73a9a002cf31445fc --- /dev/null +++ b/exps/default/yolox_x.py @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import os + +from yolox.exp import Exp as MyExp + + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.depth = 1.33 + self.width = 1.25 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] diff --git a/exps/example/mot/yolox_l_mix_det.py b/exps/example/mot/yolox_l_mix_det.py new file mode 100644 index 0000000000000000000000000000000000000000..e5f5944102b772b7ae4ce4a880ba056fd3118ce9 --- /dev/null +++ b/exps/example/mot/yolox_l_mix_det.py @@ -0,0 +1,138 @@ +# encoding: utf-8 +import os +import random +import torch +import torch.nn as nn +import torch.distributed as dist + +from yolox.exp import Exp as MyExp +from yolox.data import get_yolox_datadir + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.num_classes = 1 + self.depth = 1.0 + self.width = 1.0 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + self.train_ann = "train.json" + self.val_ann = "train.json" + self.input_size = (800, 1440) + self.test_size = (800, 1440) + self.random_size = (18, 32) + self.max_epoch = 80 + self.print_interval = 20 + self.eval_interval = 5 + self.test_conf = 0.001 + self.nmsthre = 0.7 + self.no_aug_epochs = 10 + self.basic_lr_per_img = 0.001 / 64.0 + self.warmup_epochs = 1 + + def get_data_loader(self, batch_size, is_distributed, no_aug=False): + from yolox.data import ( + MOTDataset, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + ) + + dataset = MOTDataset( + data_dir=os.path.join(get_yolox_datadir(), "mix_det"), + json_file=self.train_ann, + name='', + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=500, + ), + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=1000, + ), + degrees=self.degrees, + translate=self.translate, + scale=self.scale, + shear=self.shear, + perspective=self.perspective, + enable_mixup=self.enable_mixup, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler( + len(self.dataset), seed=self.seed if self.seed else 0 + ) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + input_dimension=self.input_size, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def get_eval_loader(self, batch_size, is_distributed, testdev=False): + from yolox.data import MOTDataset, ValTransform + + valdataset = MOTDataset( + data_dir=os.path.join(get_yolox_datadir(), "mot"), + json_file=self.val_ann, + img_size=self.test_size, + name='train', + preproc=ValTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + ), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False): + from yolox.evaluators import COCOEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + evaluator = COCOEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + testdev=testdev, + ) + return evaluator diff --git a/exps/example/mot/yolox_m_mix_det.py b/exps/example/mot/yolox_m_mix_det.py new file mode 100644 index 0000000000000000000000000000000000000000..fccb14597eeacdab5d393ae58a2c31bf17d2f2b8 --- /dev/null +++ b/exps/example/mot/yolox_m_mix_det.py @@ -0,0 +1,138 @@ +# encoding: utf-8 +import os +import random +import torch +import torch.nn as nn +import torch.distributed as dist + +from yolox.exp import Exp as MyExp +from yolox.data import get_yolox_datadir + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.num_classes = 1 + self.depth = 0.67 + self.width = 0.75 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + self.train_ann = "train.json" + self.val_ann = "train.json" + self.input_size = (800, 1440) + self.test_size = (800, 1440) + self.random_size = (18, 32) + self.max_epoch = 80 + self.print_interval = 20 + self.eval_interval = 5 + self.test_conf = 0.001 + self.nmsthre = 0.7 + self.no_aug_epochs = 10 + self.basic_lr_per_img = 0.001 / 64.0 + self.warmup_epochs = 1 + + def get_data_loader(self, batch_size, is_distributed, no_aug=False): + from yolox.data import ( + MOTDataset, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + ) + + dataset = MOTDataset( + data_dir=os.path.join(get_yolox_datadir(), "mix_det"), + json_file=self.train_ann, + name='', + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=500, + ), + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=1000, + ), + degrees=self.degrees, + translate=self.translate, + scale=self.scale, + shear=self.shear, + perspective=self.perspective, + enable_mixup=self.enable_mixup, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler( + len(self.dataset), seed=self.seed if self.seed else 0 + ) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + input_dimension=self.input_size, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def get_eval_loader(self, batch_size, is_distributed, testdev=False): + from yolox.data import MOTDataset, ValTransform + + valdataset = MOTDataset( + data_dir=os.path.join(get_yolox_datadir(), "mot"), + json_file=self.val_ann, + img_size=self.test_size, + name='train', + preproc=ValTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + ), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False): + from yolox.evaluators import COCOEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + evaluator = COCOEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + testdev=testdev, + ) + return evaluator diff --git a/exps/example/mot/yolox_s_mix_det.py b/exps/example/mot/yolox_s_mix_det.py new file mode 100644 index 0000000000000000000000000000000000000000..95f1810872b9cefd4a4d5c21c45df7b9747a24aa --- /dev/null +++ b/exps/example/mot/yolox_s_mix_det.py @@ -0,0 +1,138 @@ +# encoding: utf-8 +import os +import random +import torch +import torch.nn as nn +import torch.distributed as dist + +from yolox.exp import Exp as MyExp +from yolox.data import get_yolox_datadir + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.num_classes = 1 + self.depth = 0.33 + self.width = 0.50 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + self.train_ann = "train.json" + self.val_ann = "train.json" + self.input_size = (608, 1088) + self.test_size = (608, 1088) + self.random_size = (12, 26) + self.max_epoch = 80 + self.print_interval = 20 + self.eval_interval = 5 + self.test_conf = 0.001 + self.nmsthre = 0.7 + self.no_aug_epochs = 10 + self.basic_lr_per_img = 0.001 / 64.0 + self.warmup_epochs = 1 + + def get_data_loader(self, batch_size, is_distributed, no_aug=False): + from yolox.data import ( + MOTDataset, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + ) + + dataset = MOTDataset( + data_dir=os.path.join(get_yolox_datadir(), "mix_det"), + json_file=self.train_ann, + name='', + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=500, + ), + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=1000, + ), + degrees=self.degrees, + translate=self.translate, + scale=self.scale, + shear=self.shear, + perspective=self.perspective, + enable_mixup=self.enable_mixup, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler( + len(self.dataset), seed=self.seed if self.seed else 0 + ) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + input_dimension=self.input_size, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def get_eval_loader(self, batch_size, is_distributed, testdev=False): + from yolox.data import MOTDataset, ValTransform + + valdataset = MOTDataset( + data_dir=os.path.join(get_yolox_datadir(), "mot"), + json_file=self.val_ann, + img_size=self.test_size, + name='train', + preproc=ValTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + ), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False): + from yolox.evaluators import COCOEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + evaluator = COCOEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + testdev=testdev, + ) + return evaluator diff --git a/exps/example/mot/yolox_x_ablation.py b/exps/example/mot/yolox_x_ablation.py new file mode 100644 index 0000000000000000000000000000000000000000..6afb771555419b1166adfdce8489303ae912c9fc --- /dev/null +++ b/exps/example/mot/yolox_x_ablation.py @@ -0,0 +1,138 @@ +# encoding: utf-8 +import os +import random +import torch +import torch.nn as nn +import torch.distributed as dist + +from yolox.exp import Exp as MyExp +from yolox.data import get_yolox_datadir + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.num_classes = 1 + self.depth = 1.33 + self.width = 1.25 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + self.train_ann = "train.json" + self.val_ann = "val_half.json" + self.input_size = (800, 1440) + self.test_size = (800, 1440) + self.random_size = (18, 32) + self.max_epoch = 80 + self.print_interval = 20 + self.eval_interval = 5 + self.test_conf = 0.1 + self.nmsthre = 0.7 + self.no_aug_epochs = 10 + self.basic_lr_per_img = 0.001 / 64.0 + self.warmup_epochs = 1 + + def get_data_loader(self, batch_size, is_distributed, no_aug=False): + from yolox.data import ( + MOTDataset, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + ) + + dataset = MOTDataset( + data_dir=os.path.join(get_yolox_datadir(), "mix_mot_ch"), + json_file=self.train_ann, + name='', + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=500, + ), + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=1000, + ), + degrees=self.degrees, + translate=self.translate, + scale=self.scale, + shear=self.shear, + perspective=self.perspective, + enable_mixup=self.enable_mixup, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler( + len(self.dataset), seed=self.seed if self.seed else 0 + ) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + input_dimension=self.input_size, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def get_eval_loader(self, batch_size, is_distributed, testdev=False): + from yolox.data import MOTDataset, ValTransform + + valdataset = MOTDataset( + data_dir=os.path.join(get_yolox_datadir(), "mot"), + json_file=self.val_ann, + img_size=self.test_size, + name='train', + preproc=ValTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + ), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False): + from yolox.evaluators import COCOEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + evaluator = COCOEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + testdev=testdev, + ) + return evaluator diff --git a/exps/example/mot/yolox_x_ch.py b/exps/example/mot/yolox_x_ch.py new file mode 100644 index 0000000000000000000000000000000000000000..0e4765ef92fdfe61c9a28c4a384f156302523e24 --- /dev/null +++ b/exps/example/mot/yolox_x_ch.py @@ -0,0 +1,138 @@ +# encoding: utf-8 +import os +import random +import torch +import torch.nn as nn +import torch.distributed as dist + +from yolox.exp import Exp as MyExp +from yolox.data import get_yolox_datadir + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.num_classes = 1 + self.depth = 1.33 + self.width = 1.25 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + self.train_ann = "train.json" + self.val_ann = "val_half.json" + self.input_size = (800, 1440) + self.test_size = (800, 1440) + self.random_size = (18, 32) + self.max_epoch = 80 + self.print_interval = 20 + self.eval_interval = 5 + self.test_conf = 0.1 + self.nmsthre = 0.7 + self.no_aug_epochs = 10 + self.basic_lr_per_img = 0.001 / 64.0 + self.warmup_epochs = 1 + + def get_data_loader(self, batch_size, is_distributed, no_aug=False): + from yolox.data import ( + MOTDataset, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + ) + + dataset = MOTDataset( + data_dir=os.path.join(get_yolox_datadir(), "ch_all"), + json_file=self.train_ann, + name='', + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=500, + ), + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=1000, + ), + degrees=self.degrees, + translate=self.translate, + scale=self.scale, + shear=self.shear, + perspective=self.perspective, + enable_mixup=self.enable_mixup, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler( + len(self.dataset), seed=self.seed if self.seed else 0 + ) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + input_dimension=self.input_size, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def get_eval_loader(self, batch_size, is_distributed, testdev=False): + from yolox.data import MOTDataset, ValTransform + + valdataset = MOTDataset( + data_dir=os.path.join(get_yolox_datadir(), "mot"), + json_file=self.val_ann, + img_size=self.test_size, + name='train', + preproc=ValTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + ), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False): + from yolox.evaluators import COCOEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + evaluator = COCOEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + testdev=testdev, + ) + return evaluator diff --git a/exps/example/mot/yolox_x_mix_det.py b/exps/example/mot/yolox_x_mix_det.py new file mode 100644 index 0000000000000000000000000000000000000000..8013d94558c9e01cfe454778c4bd25231dbec7d8 --- /dev/null +++ b/exps/example/mot/yolox_x_mix_det.py @@ -0,0 +1,138 @@ +# encoding: utf-8 +import os +import random +import torch +import torch.nn as nn +import torch.distributed as dist + +from yolox.exp import Exp as MyExp +from yolox.data import get_yolox_datadir + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.num_classes = 1 + self.depth = 1.33 + self.width = 1.25 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + self.train_ann = "train.json" + self.val_ann = "test.json" # change to train.json when running on training set + self.input_size = (800, 1440) + self.test_size = (800, 1440) + self.random_size = (18, 32) + self.max_epoch = 80 + self.print_interval = 20 + self.eval_interval = 5 + self.test_conf = 0.001 + self.nmsthre = 0.7 + self.no_aug_epochs = 10 + self.basic_lr_per_img = 0.001 / 64.0 + self.warmup_epochs = 1 + + def get_data_loader(self, batch_size, is_distributed, no_aug=False): + from yolox.data import ( + MOTDataset, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + ) + + dataset = MOTDataset( + data_dir=os.path.join(get_yolox_datadir(), "mix_det"), + json_file=self.train_ann, + name='', + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=500, + ), + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=1000, + ), + degrees=self.degrees, + translate=self.translate, + scale=self.scale, + shear=self.shear, + perspective=self.perspective, + enable_mixup=self.enable_mixup, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler( + len(self.dataset), seed=self.seed if self.seed else 0 + ) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + input_dimension=self.input_size, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def get_eval_loader(self, batch_size, is_distributed, testdev=False): + from yolox.data import MOTDataset, ValTransform + + valdataset = MOTDataset( + data_dir=os.path.join(get_yolox_datadir(), "mot"), + json_file=self.val_ann, + img_size=self.test_size, + name='test', # change to train when running on training set + preproc=ValTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + ), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False): + from yolox.evaluators import COCOEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + evaluator = COCOEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + testdev=testdev, + ) + return evaluator diff --git a/exps/example/mot/yolox_x_mix_mot20_ch.py b/exps/example/mot/yolox_x_mix_mot20_ch.py new file mode 100644 index 0000000000000000000000000000000000000000..cff6b4f868607a5f3e2bb365c49acbc401f37bb0 --- /dev/null +++ b/exps/example/mot/yolox_x_mix_mot20_ch.py @@ -0,0 +1,139 @@ +# encoding: utf-8 +import os +import random +import torch +import torch.nn as nn +import torch.distributed as dist + +from yolox.exp import Exp as MyExp +from yolox.data import get_yolox_datadir + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.num_classes = 1 + self.depth = 1.33 + self.width = 1.25 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + self.train_ann = "train.json" + self.val_ann = "test.json" # change to train.json when running on training set + self.input_size = (896, 1600) + self.test_size = (896, 1600) + #self.test_size = (736, 1920) + self.random_size = (20, 36) + self.max_epoch = 80 + self.print_interval = 20 + self.eval_interval = 5 + self.test_conf = 0.001 + self.nmsthre = 0.7 + self.no_aug_epochs = 10 + self.basic_lr_per_img = 0.001 / 64.0 + self.warmup_epochs = 1 + + def get_data_loader(self, batch_size, is_distributed, no_aug=False): + from yolox.data import ( + MOTDataset, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + ) + + dataset = MOTDataset( + data_dir=os.path.join(get_yolox_datadir(), "mix_mot20_ch"), + json_file=self.train_ann, + name='', + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=600, + ), + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=1200, + ), + degrees=self.degrees, + translate=self.translate, + scale=self.scale, + shear=self.shear, + perspective=self.perspective, + enable_mixup=self.enable_mixup, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler( + len(self.dataset), seed=self.seed if self.seed else 0 + ) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + input_dimension=self.input_size, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def get_eval_loader(self, batch_size, is_distributed, testdev=False): + from yolox.data import MOTDataset, ValTransform + + valdataset = MOTDataset( + data_dir=os.path.join(get_yolox_datadir(), "MOT20"), + json_file=self.val_ann, + img_size=self.test_size, + name='test', # change to train when running on training set + preproc=ValTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + ), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False): + from yolox.evaluators import COCOEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + evaluator = COCOEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + testdev=testdev, + ) + return evaluator diff --git a/exps/example/mot/yolox_x_mot17_half.py b/exps/example/mot/yolox_x_mot17_half.py new file mode 100644 index 0000000000000000000000000000000000000000..441119b72b8714e78f8f0311933c1c24360fa3d8 --- /dev/null +++ b/exps/example/mot/yolox_x_mot17_half.py @@ -0,0 +1,138 @@ +# encoding: utf-8 +import os +import random +import torch +import torch.nn as nn +import torch.distributed as dist + +from yolox.exp import Exp as MyExp +from yolox.data import get_yolox_datadir + +class Exp(MyExp): + def __init__(self): + super(Exp, self).__init__() + self.num_classes = 1 + self.depth = 1.33 + self.width = 1.25 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + self.train_ann = "train.json" + self.val_ann = "val_half.json" + self.input_size = (800, 1440) + self.test_size = (800, 1440) + self.random_size = (18, 32) + self.max_epoch = 80 + self.print_interval = 20 + self.eval_interval = 5 + self.test_conf = 0.1 + self.nmsthre = 0.7 + self.no_aug_epochs = 10 + self.basic_lr_per_img = 0.001 / 64.0 + self.warmup_epochs = 1 + + def get_data_loader(self, batch_size, is_distributed, no_aug=False): + from yolox.data import ( + MOTDataset, + TrainTransform, + YoloBatchSampler, + DataLoader, + InfiniteSampler, + MosaicDetection, + ) + + dataset = MOTDataset( + data_dir=os.path.join(get_yolox_datadir(), "mot"), + json_file=self.train_ann, + name='train', + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=500, + ), + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=1000, + ), + degrees=self.degrees, + translate=self.translate, + scale=self.scale, + shear=self.shear, + perspective=self.perspective, + enable_mixup=self.enable_mixup, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler( + len(self.dataset), seed=self.seed if self.seed else 0 + ) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + input_dimension=self.input_size, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def get_eval_loader(self, batch_size, is_distributed, testdev=False): + from yolox.data import MOTDataset, ValTransform + + valdataset = MOTDataset( + data_dir=os.path.join(get_yolox_datadir(), "mot"), + json_file=self.val_ann, + img_size=self.test_size, + name='train', + preproc=ValTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + ), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False): + from yolox.evaluators import COCOEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + evaluator = COCOEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + testdev=testdev, + ) + return evaluator diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..98e22c1d1c7c18afe58994a1806a15d2fedda563 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,22 @@ +# TODO: Update with exact module version +numpy +torch>=1.7 +opencv_python +loguru +scikit-image +tqdm +torchvision==0.10.0 +Pillow +thop +ninja +tabulate +tensorboard +lap +motmetrics +filterpy +h5py + +# verified versions +onnx==1.8.1 +onnxruntime==1.8.0 +onnx-simplifier==0.3.5 diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000000000000000000000000000000000000..5c0cb3cc460cd11b7ef0adc08f9edf205bcb6914 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,18 @@ +[isort] +line_length = 100 +multi_line_output = 3 +balanced_wrapping = True +known_standard_library = setuptools +known_third_party = tqdm,loguru +known_data_processing = cv2,numpy,scipy,PIL,matplotlib,scikit_image +known_datasets = pycocotools +known_deeplearning = torch,torchvision,caffe2,onnx,apex,timm,thop,torch2trt,tensorrt,openvino,onnxruntime +known_myself = yolox +sections = FUTURE,STDLIB,THIRDPARTY,data_processing,datasets,deeplearning,myself,FIRSTPARTY,LOCALFOLDER +no_lines_before=STDLIB,THIRDPARTY,datasets +default_section = FIRSTPARTY + +[flake8] +max-line-length = 100 +max-complexity = 18 +exclude = __init__.py diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..ab3aca97b5fed932e7a40e21f6633f9f6cb84879 --- /dev/null +++ b/setup.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# Copyright (c) Megvii, Inc. and its affiliates. All Rights Reserved + +import re +import setuptools +import glob +from os import path +import torch +from torch.utils.cpp_extension import CppExtension + +torch_ver = [int(x) for x in torch.__version__.split(".")[:2]] +assert torch_ver >= [1, 3], "Requires PyTorch >= 1.3" + + +def get_extensions(): + this_dir = path.dirname(path.abspath(__file__)) + extensions_dir = path.join(this_dir, "yolox", "layers", "csrc") + + main_source = path.join(extensions_dir, "vision.cpp") + sources = glob.glob(path.join(extensions_dir, "**", "*.cpp")) + + sources = [main_source] + sources + extension = CppExtension + + extra_compile_args = {"cxx": ["-O3"]} + define_macros = [] + + include_dirs = [extensions_dir] + + ext_modules = [ + extension( + "yolox._C", + sources, + include_dirs=include_dirs, + define_macros=define_macros, + extra_compile_args=extra_compile_args, + ) + ] + + return ext_modules + + +with open("yolox/__init__.py", "r") as f: + version = re.search( + r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', + f.read(), re.MULTILINE + ).group(1) + + +with open("README.md", "r") as f: + long_description = f.read() + + +setuptools.setup( + name="yolox", + version=version, + author="basedet team", + python_requires=">=3.6", + long_description=long_description, + ext_modules=get_extensions(), + classifiers=["Programming Language :: Python :: 3", "Operating System :: OS Independent"], + cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, + packages=setuptools.find_namespace_packages(), +) diff --git a/tools/convert_cityperson_to_coco.py b/tools/convert_cityperson_to_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..e3c2a284fa9b20db329c50e1bb2c47479b680125 --- /dev/null +++ b/tools/convert_cityperson_to_coco.py @@ -0,0 +1,59 @@ +import os +import numpy as np +import json +from PIL import Image + +DATA_PATH = 'datasets/Cityscapes/' +DATA_FILE_PATH = 'datasets/data_path/citypersons.train' +OUT_PATH = DATA_PATH + 'annotations/' + +def load_paths(data_path): + with open(data_path, 'r') as file: + img_files = file.readlines() + img_files = [x.replace('\n', '') for x in img_files] + img_files = list(filter(lambda x: len(x) > 0, img_files)) + label_files = [x.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt') for x in img_files] + return img_files, label_files + +if __name__ == '__main__': + if not os.path.exists(OUT_PATH): + os.mkdir(OUT_PATH) + + out_path = OUT_PATH + 'train.json' + out = {'images': [], 'annotations': [], 'categories': [{'id': 1, 'name': 'person'}]} + img_paths, label_paths = load_paths(DATA_FILE_PATH) + image_cnt = 0 + ann_cnt = 0 + video_cnt = 0 + for img_path, label_path in zip(img_paths, label_paths): + image_cnt += 1 + im = Image.open(img_path) + image_info = {'file_name': img_path, + 'id': image_cnt, + 'height': im.size[1], + 'width': im.size[0]} + out['images'].append(image_info) + # Load labels + if os.path.isfile(label_path): + labels0 = np.loadtxt(label_path, dtype=np.float32).reshape(-1, 6) + # Normalized xywh to pixel xyxy format + labels = labels0.copy() + labels[:, 2] = image_info['width'] * (labels0[:, 2] - labels0[:, 4] / 2) + labels[:, 3] = image_info['height'] * (labels0[:, 3] - labels0[:, 5] / 2) + labels[:, 4] = image_info['width'] * labels0[:, 4] + labels[:, 5] = image_info['height'] * labels0[:, 5] + else: + labels = np.array([]) + for i in range(len(labels)): + ann_cnt += 1 + fbox = labels[i, 2:6].tolist() + ann = {'id': ann_cnt, + 'category_id': 1, + 'image_id': image_cnt, + 'track_id': -1, + 'bbox': fbox, + 'area': fbox[2] * fbox[3], + 'iscrowd': 0} + out['annotations'].append(ann) + print('loaded train for {} images and {} samples'.format(len(out['images']), len(out['annotations']))) + json.dump(out, open(out_path, 'w')) \ No newline at end of file diff --git a/tools/convert_crowdhuman_to_coco.py b/tools/convert_crowdhuman_to_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..62e0b66788f7625e2fbb5ba420794abf1125aa84 --- /dev/null +++ b/tools/convert_crowdhuman_to_coco.py @@ -0,0 +1,57 @@ +import os +import numpy as np +import json +from PIL import Image + +DATA_PATH = 'datasets/crowdhuman/' +OUT_PATH = DATA_PATH + 'annotations/' +SPLITS = ['val', 'train'] +DEBUG = False + +def load_func(fpath): + print('fpath', fpath) + assert os.path.exists(fpath) + with open(fpath,'r') as fid: + lines = fid.readlines() + records =[json.loads(line.strip('\n')) for line in lines] + return records + +if __name__ == '__main__': + if not os.path.exists(OUT_PATH): + os.mkdir(OUT_PATH) + for split in SPLITS: + data_path = DATA_PATH + split + out_path = OUT_PATH + '{}.json'.format(split) + out = {'images': [], 'annotations': [], 'categories': [{'id': 1, 'name': 'person'}]} + ann_path = DATA_PATH + 'annotation_{}.odgt'.format(split) + anns_data = load_func(ann_path) + image_cnt = 0 + ann_cnt = 0 + video_cnt = 0 + for ann_data in anns_data: + image_cnt += 1 + file_path = DATA_PATH + 'CrowdHuman_{}/'.format(split) + '{}.jpg'.format(ann_data['ID']) + im = Image.open(file_path) + image_info = {'file_name': '{}.jpg'.format(ann_data['ID']), + 'id': image_cnt, + 'height': im.size[1], + 'width': im.size[0]} + out['images'].append(image_info) + if split != 'test': + anns = ann_data['gtboxes'] + for i in range(len(anns)): + ann_cnt += 1 + fbox = anns[i]['fbox'] + ann = {'id': ann_cnt, + 'category_id': 1, + 'image_id': image_cnt, + 'track_id': -1, + 'bbox_vis': anns[i]['vbox'], + 'bbox': fbox, + 'area': fbox[2] * fbox[3], + 'iscrowd': 1 if 'extra' in anns[i] and \ + 'ignore' in anns[i]['extra'] and \ + anns[i]['extra']['ignore'] == 1 else 0} + out['annotations'].append(ann) + print('loaded {} for {} images and {} samples'.format(split, len(out['images']), len(out['annotations']))) + json.dump(out, open(out_path, 'w')) \ No newline at end of file diff --git a/tools/convert_ethz_to_coco.py b/tools/convert_ethz_to_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..ceb32810dd0c6970f93d819bcca886fd42451a61 --- /dev/null +++ b/tools/convert_ethz_to_coco.py @@ -0,0 +1,59 @@ +import os +import numpy as np +import json +from PIL import Image + +DATA_PATH = 'datasets/ETHZ/' +DATA_FILE_PATH = 'datasets/data_path/eth.train' +OUT_PATH = DATA_PATH + 'annotations/' + +def load_paths(data_path): + with open(data_path, 'r') as file: + img_files = file.readlines() + img_files = [x.replace('\n', '') for x in img_files] + img_files = list(filter(lambda x: len(x) > 0, img_files)) + label_files = [x.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt') for x in img_files] + return img_files, label_files + +if __name__ == '__main__': + if not os.path.exists(OUT_PATH): + os.mkdir(OUT_PATH) + + out_path = OUT_PATH + 'train.json' + out = {'images': [], 'annotations': [], 'categories': [{'id': 1, 'name': 'person'}]} + img_paths, label_paths = load_paths(DATA_FILE_PATH) + image_cnt = 0 + ann_cnt = 0 + video_cnt = 0 + for img_path, label_path in zip(img_paths, label_paths): + image_cnt += 1 + im = Image.open(img_path) + image_info = {'file_name': img_path, + 'id': image_cnt, + 'height': im.size[1], + 'width': im.size[0]} + out['images'].append(image_info) + # Load labels + if os.path.isfile(label_path): + labels0 = np.loadtxt(label_path, dtype=np.float32).reshape(-1, 6) + # Normalized xywh to pixel xyxy format + labels = labels0.copy() + labels[:, 2] = image_info['width'] * (labels0[:, 2] - labels0[:, 4] / 2) + labels[:, 3] = image_info['height'] * (labels0[:, 3] - labels0[:, 5] / 2) + labels[:, 4] = image_info['width'] * labels0[:, 4] + labels[:, 5] = image_info['height'] * labels0[:, 5] + else: + labels = np.array([]) + for i in range(len(labels)): + ann_cnt += 1 + fbox = labels[i, 2:6].tolist() + ann = {'id': ann_cnt, + 'category_id': 1, + 'image_id': image_cnt, + 'track_id': -1, + 'bbox': fbox, + 'area': fbox[2] * fbox[3], + 'iscrowd': 0} + out['annotations'].append(ann) + print('loaded train for {} images and {} samples'.format(len(out['images']), len(out['annotations']))) + json.dump(out, open(out_path, 'w')) \ No newline at end of file diff --git a/tools/convert_mot17_to_coco.py b/tools/convert_mot17_to_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..403798c14613e82523557e9730464b64e6403a86 --- /dev/null +++ b/tools/convert_mot17_to_coco.py @@ -0,0 +1,133 @@ +import os +import numpy as np +import json +import cv2 + + +# Use the same script for MOT16 +DATA_PATH = 'datasets/mot' +OUT_PATH = os.path.join(DATA_PATH, 'annotations') +SPLITS = ['train_half', 'val_half', 'train', 'test'] # --> split training data to train_half and val_half. +HALF_VIDEO = True +CREATE_SPLITTED_ANN = True +CREATE_SPLITTED_DET = True + + +if __name__ == '__main__': + + if not os.path.exists(OUT_PATH): + os.makedirs(OUT_PATH) + + for split in SPLITS: + if split == "test": + data_path = os.path.join(DATA_PATH, 'test') + else: + data_path = os.path.join(DATA_PATH, 'train') + out_path = os.path.join(OUT_PATH, '{}.json'.format(split)) + out = {'images': [], 'annotations': [], 'videos': [], + 'categories': [{'id': 1, 'name': 'pedestrian'}]} + seqs = os.listdir(data_path) + image_cnt = 0 + ann_cnt = 0 + video_cnt = 0 + tid_curr = 0 + tid_last = -1 + for seq in sorted(seqs): + if '.DS_Store' in seq: + continue + if 'mot' in DATA_PATH and (split != 'test' and not ('FRCNN' in seq)): + continue + video_cnt += 1 # video sequence number. + out['videos'].append({'id': video_cnt, 'file_name': seq}) + seq_path = os.path.join(data_path, seq) + img_path = os.path.join(seq_path, 'img1') + ann_path = os.path.join(seq_path, 'gt/gt.txt') + images = os.listdir(img_path) + num_images = len([image for image in images if 'jpg' in image]) # half and half + + if HALF_VIDEO and ('half' in split): + image_range = [0, num_images // 2] if 'train' in split else \ + [num_images // 2 + 1, num_images - 1] + else: + image_range = [0, num_images - 1] + + for i in range(num_images): + if i < image_range[0] or i > image_range[1]: + continue + img = cv2.imread(os.path.join(data_path, '{}/img1/{:06d}.jpg'.format(seq, i + 1))) + height, width = img.shape[:2] + image_info = {'file_name': '{}/img1/{:06d}.jpg'.format(seq, i + 1), # image name. + 'id': image_cnt + i + 1, # image number in the entire training set. + 'frame_id': i + 1 - image_range[0], # image number in the video sequence, starting from 1. + 'prev_image_id': image_cnt + i if i > 0 else -1, # image number in the entire training set. + 'next_image_id': image_cnt + i + 2 if i < num_images - 1 else -1, + 'video_id': video_cnt, + 'height': height, 'width': width} + out['images'].append(image_info) + print('{}: {} images'.format(seq, num_images)) + if split != 'test': + det_path = os.path.join(seq_path, 'det/det.txt') + anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',') + dets = np.loadtxt(det_path, dtype=np.float32, delimiter=',') + if CREATE_SPLITTED_ANN and ('half' in split): + anns_out = np.array([anns[i] for i in range(anns.shape[0]) + if int(anns[i][0]) - 1 >= image_range[0] and + int(anns[i][0]) - 1 <= image_range[1]], np.float32) + anns_out[:, 0] -= image_range[0] + gt_out = os.path.join(seq_path, 'gt/gt_{}.txt'.format(split)) + fout = open(gt_out, 'w') + for o in anns_out: + fout.write('{:d},{:d},{:d},{:d},{:d},{:d},{:d},{:d},{:.6f}\n'.format( + int(o[0]), int(o[1]), int(o[2]), int(o[3]), int(o[4]), int(o[5]), + int(o[6]), int(o[7]), o[8])) + fout.close() + if CREATE_SPLITTED_DET and ('half' in split): + dets_out = np.array([dets[i] for i in range(dets.shape[0]) + if int(dets[i][0]) - 1 >= image_range[0] and + int(dets[i][0]) - 1 <= image_range[1]], np.float32) + dets_out[:, 0] -= image_range[0] + det_out = os.path.join(seq_path, 'det/det_{}.txt'.format(split)) + dout = open(det_out, 'w') + for o in dets_out: + dout.write('{:d},{:d},{:.1f},{:.1f},{:.1f},{:.1f},{:.6f}\n'.format( + int(o[0]), int(o[1]), float(o[2]), float(o[3]), float(o[4]), float(o[5]), + float(o[6]))) + dout.close() + + print('{} ann images'.format(int(anns[:, 0].max()))) + for i in range(anns.shape[0]): + frame_id = int(anns[i][0]) + if frame_id - 1 < image_range[0] or frame_id - 1 > image_range[1]: + continue + track_id = int(anns[i][1]) + cat_id = int(anns[i][7]) + ann_cnt += 1 + if not ('15' in DATA_PATH): + #if not (float(anns[i][8]) >= 0.25): # visibility. + #continue + if not (int(anns[i][6]) == 1): # whether ignore. + continue + if int(anns[i][7]) in [3, 4, 5, 6, 9, 10, 11]: # Non-person + continue + if int(anns[i][7]) in [2, 7, 8, 12]: # Ignored person + category_id = -1 + else: + category_id = 1 # pedestrian(non-static) + if not track_id == tid_last: + tid_curr += 1 + tid_last = track_id + else: + category_id = 1 + ann = {'id': ann_cnt, + 'category_id': category_id, + 'image_id': image_cnt + frame_id, + 'track_id': tid_curr, + 'bbox': anns[i][2:6].tolist(), + 'conf': float(anns[i][6]), + 'iscrowd': 0, + 'area': float(anns[i][4] * anns[i][5])} + out['annotations'].append(ann) + image_cnt += num_images + print(tid_curr, tid_last) + print('loaded {} for {} images and {} samples'.format(split, len(out['images']), len(out['annotations']))) + json.dump(out, open(out_path, 'w')) \ No newline at end of file diff --git a/tools/convert_mot20_to_coco.py b/tools/convert_mot20_to_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..67bd9b55b94dc8511b8542d0391d73681238c8b7 --- /dev/null +++ b/tools/convert_mot20_to_coco.py @@ -0,0 +1,132 @@ +import os +import numpy as np +import json +import cv2 + + +# Use the same script for MOT16 +DATA_PATH = 'datasets/MOT20' +OUT_PATH = os.path.join(DATA_PATH, 'annotations') +SPLITS = ['train_half', 'val_half', 'train', 'test'] # --> split training data to train_half and val_half. +HALF_VIDEO = True +CREATE_SPLITTED_ANN = True +CREATE_SPLITTED_DET = True + + +if __name__ == '__main__': + + if not os.path.exists(OUT_PATH): + os.makedirs(OUT_PATH) + + for split in SPLITS: + if split == "test": + data_path = os.path.join(DATA_PATH, 'test') + else: + data_path = os.path.join(DATA_PATH, 'train') + out_path = os.path.join(OUT_PATH, '{}.json'.format(split)) + out = {'images': [], 'annotations': [], 'videos': [], + 'categories': [{'id': 1, 'name': 'pedestrian'}]} + seqs = os.listdir(data_path) + image_cnt = 0 + ann_cnt = 0 + video_cnt = 0 + tid_curr = 0 + tid_last = -1 + for seq in sorted(seqs): + if '.DS_Store' in seq: + continue + video_cnt += 1 # video sequence number. + out['videos'].append({'id': video_cnt, 'file_name': seq}) + seq_path = os.path.join(data_path, seq) + img_path = os.path.join(seq_path, 'img1') + ann_path = os.path.join(seq_path, 'gt/gt.txt') + images = os.listdir(img_path) + num_images = len([image for image in images if 'jpg' in image]) # half and half + + if HALF_VIDEO and ('half' in split): + image_range = [0, num_images // 2] if 'train' in split else \ + [num_images // 2 + 1, num_images - 1] + else: + image_range = [0, num_images - 1] + + for i in range(num_images): + if i < image_range[0] or i > image_range[1]: + continue + img = cv2.imread(os.path.join(data_path, '{}/img1/{:06d}.jpg'.format(seq, i + 1))) + height, width = img.shape[:2] + image_info = {'file_name': '{}/img1/{:06d}.jpg'.format(seq, i + 1), # image name. + 'id': image_cnt + i + 1, # image number in the entire training set. + 'frame_id': i + 1 - image_range[0], # image number in the video sequence, starting from 1. + 'prev_image_id': image_cnt + i if i > 0 else -1, # image number in the entire training set. + 'next_image_id': image_cnt + i + 2 if i < num_images - 1 else -1, + 'video_id': video_cnt, + 'height': height, 'width': width} + out['images'].append(image_info) + print('{}: {} images'.format(seq, num_images)) + if split != 'test': + det_path = os.path.join(seq_path, 'det/det.txt') + anns = np.loadtxt(ann_path, dtype=np.float32, delimiter=',') + dets = np.loadtxt(det_path, dtype=np.float32, delimiter=',') + if CREATE_SPLITTED_ANN and ('half' in split): + anns_out = np.array([anns[i] for i in range(anns.shape[0]) + if int(anns[i][0]) - 1 >= image_range[0] and + int(anns[i][0]) - 1 <= image_range[1]], np.float32) + anns_out[:, 0] -= image_range[0] + gt_out = os.path.join(seq_path, 'gt/gt_{}.txt'.format(split)) + fout = open(gt_out, 'w') + for o in anns_out: + fout.write('{:d},{:d},{:d},{:d},{:d},{:d},{:d},{:d},{:.6f}\n'.format( + int(o[0]), int(o[1]), int(o[2]), int(o[3]), int(o[4]), int(o[5]), + int(o[6]), int(o[7]), o[8])) + fout.close() + if CREATE_SPLITTED_DET and ('half' in split): + dets_out = np.array([dets[i] for i in range(dets.shape[0]) + if int(dets[i][0]) - 1 >= image_range[0] and + int(dets[i][0]) - 1 <= image_range[1]], np.float32) + dets_out[:, 0] -= image_range[0] + det_out = os.path.join(seq_path, 'det/det_{}.txt'.format(split)) + dout = open(det_out, 'w') + for o in dets_out: + dout.write('{:d},{:d},{:.1f},{:.1f},{:.1f},{:.1f},{:.6f}\n'.format( + int(o[0]), int(o[1]), float(o[2]), float(o[3]), float(o[4]), float(o[5]), + float(o[6]))) + dout.close() + + print('{} ann images'.format(int(anns[:, 0].max()))) + for i in range(anns.shape[0]): + frame_id = int(anns[i][0]) + if frame_id - 1 < image_range[0] or frame_id - 1 > image_range[1]: + continue + track_id = int(anns[i][1]) + cat_id = int(anns[i][7]) + ann_cnt += 1 + if not ('15' in DATA_PATH): + #if not (float(anns[i][8]) >= 0.25): # visibility. + #continue + if not (int(anns[i][6]) == 1): # whether ignore. + continue + if int(anns[i][7]) in [3, 4, 5, 6, 9, 10, 11]: # Non-person + continue + if int(anns[i][7]) in [2, 7, 8, 12]: # Ignored person + #category_id = -1 + continue + else: + category_id = 1 # pedestrian(non-static) + if not track_id == tid_last: + tid_curr += 1 + tid_last = track_id + else: + category_id = 1 + ann = {'id': ann_cnt, + 'category_id': category_id, + 'image_id': image_cnt + frame_id, + 'track_id': tid_curr, + 'bbox': anns[i][2:6].tolist(), + 'conf': float(anns[i][6]), + 'iscrowd': 0, + 'area': float(anns[i][4] * anns[i][5])} + out['annotations'].append(ann) + image_cnt += num_images + print(tid_curr, tid_last) + print('loaded {} for {} images and {} samples'.format(split, len(out['images']), len(out['annotations']))) + json.dump(out, open(out_path, 'w')) \ No newline at end of file diff --git a/tools/convert_video.py b/tools/convert_video.py new file mode 100644 index 0000000000000000000000000000000000000000..16e8aaaf117a95e72d8f85b6376a5e2b8eaaf300 --- /dev/null +++ b/tools/convert_video.py @@ -0,0 +1,26 @@ +import cv2 + +def convert_video(video_path): + cap = cv2.VideoCapture(video_path) + width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) # float + height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) # float + fps = cap.get(cv2.CAP_PROP_FPS) + video_name = video_path.split('/')[-1].split('.')[0] + save_name = video_name + '_converted' + save_path = video_path.replace(video_name, save_name) + vid_writer = cv2.VideoWriter( + save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height)) + ) + while True: + ret_val, frame = cap.read() + if ret_val: + vid_writer.write(frame) + ch = cv2.waitKey(1) + if ch == 27 or ch == ord("q") or ch == ord("Q"): + break + else: + break + +if __name__ == "__main__": + video_path = 'videos/palace.mp4' + convert_video(video_path) \ No newline at end of file diff --git a/tools/demo_track.py b/tools/demo_track.py new file mode 100644 index 0000000000000000000000000000000000000000..1b3940558f11ed9c1d4678e96bd6bcb77b4b3d7a --- /dev/null +++ b/tools/demo_track.py @@ -0,0 +1,349 @@ +from loguru import logger + +import cv2 + +import torch + +from yolox.data.data_augment import preproc +from yolox.exp import get_exp +from yolox.utils import fuse_model, get_model_info, postprocess, vis +from yolox.utils.visualize import plot_tracking +from yolox.tracker.byte_tracker import BYTETracker +from yolox.tracking_utils.timer import Timer + +import argparse +import os +import time + +IMAGE_EXT = [".jpg", ".jpeg", ".webp", ".bmp", ".png"] + + +def make_parser(): + parser = argparse.ArgumentParser("ByteTrack Demo!") + parser.add_argument( + "demo", default="image", help="demo type, eg. image, video and webcam" + ) + parser.add_argument("-expn", "--experiment-name", type=str, default=None) + parser.add_argument("-n", "--name", type=str, default=None, help="model name") + + parser.add_argument( + #"--path", default="./datasets/mot/train/MOT17-05-FRCNN/img1", help="path to images or video" + "--path", default="./videos/palace.mp4", help="path to images or video" + ) + parser.add_argument("--camid", type=int, default=0, help="webcam demo camera id") + parser.add_argument( + "--save_result", + action="store_true", + help="whether to save the inference result of image/video", + ) + + # exp file + parser.add_argument( + "-f", + "--exp_file", + default=None, + type=str, + help="pls input your expriment description file", + ) + parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt for eval") + parser.add_argument( + "--device", + default="gpu", + type=str, + help="device to run our model, can either be cpu or gpu", + ) + parser.add_argument("--conf", default=None, type=float, help="test conf") + parser.add_argument("--nms", default=None, type=float, help="test nms threshold") + parser.add_argument("--tsize", default=None, type=int, help="test img size") + parser.add_argument( + "--fp16", + dest="fp16", + default=False, + action="store_true", + help="Adopting mix precision evaluating.", + ) + parser.add_argument( + "--fuse", + dest="fuse", + default=False, + action="store_true", + help="Fuse conv and bn for testing.", + ) + parser.add_argument( + "--trt", + dest="trt", + default=False, + action="store_true", + help="Using TensorRT model for testing.", + ) + # tracking args + parser.add_argument("--track_thresh", type=float, default=0.5, help="tracking confidence threshold") + parser.add_argument("--track_buffer", type=int, default=30, help="the frames for keep lost tracks") + parser.add_argument("--match_thresh", type=int, default=0.8, help="matching threshold for tracking") + parser.add_argument('--min-box-area', type=float, default=10, help='filter out tiny boxes') + parser.add_argument("--mot20", dest="mot20", default=False, action="store_true", help="test mot20.") + return parser + + +def get_image_list(path): + image_names = [] + for maindir, subdir, file_name_list in os.walk(path): + for filename in file_name_list: + apath = os.path.join(maindir, filename) + ext = os.path.splitext(apath)[1] + if ext in IMAGE_EXT: + image_names.append(apath) + return image_names + + +def write_results(filename, results): + save_format = '{frame},{id},{x1},{y1},{w},{h},{s},-1,-1,-1\n' + with open(filename, 'w') as f: + for frame_id, tlwhs, track_ids, scores in results: + for tlwh, track_id, score in zip(tlwhs, track_ids, scores): + if track_id < 0: + continue + x1, y1, w, h = tlwh + line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1), s=round(score, 2)) + f.write(line) + logger.info('save results to {}'.format(filename)) + + +class Predictor(object): + def __init__( + self, + model, + exp, + trt_file=None, + decoder=None, + device="cpu", + fp16=False + ): + self.model = model + self.decoder = decoder + self.num_classes = exp.num_classes + self.confthre = exp.test_conf + self.nmsthre = exp.nmsthre + self.test_size = exp.test_size + self.device = device + self.fp16 = fp16 + if trt_file is not None: + from torch2trt import TRTModule + + model_trt = TRTModule() + model_trt.load_state_dict(torch.load(trt_file)) + + x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda() + self.model(x) + self.model = model_trt + self.rgb_means = (0.485, 0.456, 0.406) + self.std = (0.229, 0.224, 0.225) + + def inference(self, img, timer): + img_info = {"id": 0} + if isinstance(img, str): + img_info["file_name"] = os.path.basename(img) + img = cv2.imread(img) + else: + img_info["file_name"] = None + + height, width = img.shape[:2] + img_info["height"] = height + img_info["width"] = width + img_info["raw_img"] = img + + img, ratio = preproc(img, self.test_size, self.rgb_means, self.std) + img_info["ratio"] = ratio + img = torch.from_numpy(img).unsqueeze(0) + img = img.float() + if self.device == "gpu": + img = img.cuda() + if self.fp16: + img = img.half() # to FP16 + + with torch.no_grad(): + timer.tic() + outputs = self.model(img) + if self.decoder is not None: + outputs = self.decoder(outputs, dtype=outputs.type()) + outputs = postprocess( + outputs, self.num_classes, self.confthre, self.nmsthre + ) + #logger.info("Infer time: {:.4f}s".format(time.time() - t0)) + return outputs, img_info + + +def image_demo(predictor, vis_folder, path, current_time, save_result): + if os.path.isdir(path): + files = get_image_list(path) + else: + files = [path] + files.sort() + tracker = BYTETracker(args, frame_rate=30) + timer = Timer() + frame_id = 0 + results = [] + for image_name in files: + if frame_id % 20 == 0: + logger.info('Processing frame {} ({:.2f} fps)'.format(frame_id, 1. / max(1e-5, timer.average_time))) + outputs, img_info = predictor.inference(image_name, timer) + online_targets = tracker.update(outputs[0], [img_info['height'], img_info['width']], exp.test_size) + online_tlwhs = [] + online_ids = [] + online_scores = [] + for t in online_targets: + tlwh = t.tlwh + tid = t.track_id + vertical = tlwh[2] / tlwh[3] > 1.6 + if tlwh[2] * tlwh[3] > args.min_box_area and not vertical: + online_tlwhs.append(tlwh) + online_ids.append(tid) + online_scores.append(t.score) + timer.toc() + # save results + results.append((frame_id + 1, online_tlwhs, online_ids, online_scores)) + online_im = plot_tracking(img_info['raw_img'], online_tlwhs, online_ids, frame_id=frame_id + 1, + fps=1. / timer.average_time) + + #result_image = predictor.visual(outputs[0], img_info, predictor.confthre) + if save_result: + save_folder = os.path.join( + vis_folder, time.strftime("%Y_%m_%d_%H_%M_%S", current_time) + ) + os.makedirs(save_folder, exist_ok=True) + save_file_name = os.path.join(save_folder, os.path.basename(image_name)) + cv2.imwrite(save_file_name, online_im) + ch = cv2.waitKey(0) + frame_id += 1 + if ch == 27 or ch == ord("q") or ch == ord("Q"): + break + #write_results(result_filename, results) + + +def imageflow_demo(predictor, vis_folder, current_time, args): + cap = cv2.VideoCapture(args.path if args.demo == "video" else args.camid) + width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) # float + height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) # float + fps = cap.get(cv2.CAP_PROP_FPS) + save_folder = os.path.join( + vis_folder, time.strftime("%Y_%m_%d_%H_%M_%S", current_time) + ) + os.makedirs(save_folder, exist_ok=True) + if args.demo == "video": + save_path = os.path.join(save_folder, args.path.split("/")[-1]) + else: + save_path = os.path.join(save_folder, "camera.mp4") + logger.info(f"video save_path is {save_path}") + vid_writer = cv2.VideoWriter( + save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height)) + ) + tracker = BYTETracker(args, frame_rate=30) + timer = Timer() + frame_id = 0 + results = [] + while True: + if frame_id % 20 == 0: + logger.info('Processing frame {} ({:.2f} fps)'.format(frame_id, 1. / max(1e-5, timer.average_time))) + ret_val, frame = cap.read() + if ret_val: + outputs, img_info = predictor.inference(frame, timer) + online_targets = tracker.update(outputs[0], [img_info['height'], img_info['width']], exp.test_size) + online_tlwhs = [] + online_ids = [] + online_scores = [] + for t in online_targets: + tlwh = t.tlwh + tid = t.track_id + vertical = tlwh[2] / tlwh[3] > 1.6 + if tlwh[2] * tlwh[3] > args.min_box_area and not vertical: + online_tlwhs.append(tlwh) + online_ids.append(tid) + online_scores.append(t.score) + timer.toc() + results.append((frame_id + 1, online_tlwhs, online_ids, online_scores)) + online_im = plot_tracking(img_info['raw_img'], online_tlwhs, online_ids, frame_id=frame_id + 1, + fps=1. / timer.average_time) + if args.save_result: + vid_writer.write(online_im) + ch = cv2.waitKey(1) + if ch == 27 or ch == ord("q") or ch == ord("Q"): + break + else: + break + frame_id += 1 + + +def main(exp, args): + if not args.experiment_name: + args.experiment_name = exp.exp_name + + file_name = os.path.join(exp.output_dir, args.experiment_name) + os.makedirs(file_name, exist_ok=True) + + if args.save_result: + vis_folder = os.path.join(file_name, "track_vis") + os.makedirs(vis_folder, exist_ok=True) + + if args.trt: + args.device = "gpu" + + logger.info("Args: {}".format(args)) + + if args.conf is not None: + exp.test_conf = args.conf + if args.nms is not None: + exp.nmsthre = args.nms + if args.tsize is not None: + exp.test_size = (args.tsize, args.tsize) + + model = exp.get_model() + logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size))) + + if args.device == "gpu": + model.cuda() + model.eval() + + if not args.trt: + if args.ckpt is None: + ckpt_file = os.path.join(file_name, "best_ckpt.pth.tar") + else: + ckpt_file = args.ckpt + logger.info("loading checkpoint") + ckpt = torch.load(ckpt_file, map_location="cpu") + # load the model state dict + model.load_state_dict(ckpt["model"]) + logger.info("loaded checkpoint done.") + + if args.fuse: + logger.info("\tFusing model...") + model = fuse_model(model) + + if args.fp16: + model = model.half() # to FP16 + + if args.trt: + assert not args.fuse, "TensorRT model is not support model fusing!" + trt_file = os.path.join(file_name, "model_trt.pth") + assert os.path.exists( + trt_file + ), "TensorRT model is not found!\n Run python3 tools/trt.py first!" + model.head.decode_in_inference = False + decoder = model.head.decode_outputs + logger.info("Using TensorRT to inference") + else: + trt_file = None + decoder = None + + predictor = Predictor(model, exp, trt_file, decoder, args.device, args.fp16) + current_time = time.localtime() + if args.demo == "image": + image_demo(predictor, vis_folder, args.path, current_time, args.save_result) + elif args.demo == "video" or args.demo == "webcam": + imageflow_demo(predictor, vis_folder, current_time, args) + + +if __name__ == "__main__": + args = make_parser().parse_args() + exp = get_exp(args.exp_file, args.name) + + main(exp, args) diff --git a/tools/export_onnx.py b/tools/export_onnx.py new file mode 100644 index 0000000000000000000000000000000000000000..71b16fe3b3bd4e4a2b1315d2c2185ad2195dc37b --- /dev/null +++ b/tools/export_onnx.py @@ -0,0 +1,102 @@ +from loguru import logger + +import torch +from torch import nn + +from yolox.exp import get_exp +from yolox.models.network_blocks import SiLU +from yolox.utils import replace_module + +import argparse +import os + + +def make_parser(): + parser = argparse.ArgumentParser("YOLOX onnx deploy") + parser.add_argument( + "--output-name", type=str, default="bytetrack_s.onnx", help="output name of models" + ) + parser.add_argument( + "--input", default="images", type=str, help="input node name of onnx model" + ) + parser.add_argument( + "--output", default="output", type=str, help="output node name of onnx model" + ) + parser.add_argument( + "-o", "--opset", default=11, type=int, help="onnx opset version" + ) + parser.add_argument("--no-onnxsim", action="store_true", help="use onnxsim or not") + parser.add_argument( + "-f", + "--exp_file", + default=None, + type=str, + help="expriment description file", + ) + parser.add_argument("-expn", "--experiment-name", type=str, default=None) + parser.add_argument("-n", "--name", type=str, default=None, help="model name") + parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt path") + parser.add_argument( + "opts", + help="Modify config options using the command-line", + default=None, + nargs=argparse.REMAINDER, + ) + + return parser + + +@logger.catch +def main(): + args = make_parser().parse_args() + logger.info("args value: {}".format(args)) + exp = get_exp(args.exp_file, args.name) + exp.merge(args.opts) + + if not args.experiment_name: + args.experiment_name = exp.exp_name + + model = exp.get_model() + if args.ckpt is None: + file_name = os.path.join(exp.output_dir, args.experiment_name) + ckpt_file = os.path.join(file_name, "best_ckpt.pth.tar") + else: + ckpt_file = args.ckpt + + # load the model state dict + ckpt = torch.load(ckpt_file, map_location="cpu") + + model.eval() + if "model" in ckpt: + ckpt = ckpt["model"] + model.load_state_dict(ckpt) + model = replace_module(model, nn.SiLU, SiLU) + model.head.decode_in_inference = False + + logger.info("loading checkpoint done.") + dummy_input = torch.randn(1, 3, exp.test_size[0], exp.test_size[1]) + torch.onnx._export( + model, + dummy_input, + args.output_name, + input_names=[args.input], + output_names=[args.output], + opset_version=args.opset, + ) + logger.info("generated onnx model named {}".format(args.output_name)) + + if not args.no_onnxsim: + import onnx + + from onnxsim import simplify + + # use onnxsimplify to reduce reduent model. + onnx_model = onnx.load(args.output_name) + model_simp, check = simplify(onnx_model) + assert check, "Simplified ONNX model could not be validated" + onnx.save(model_simp, args.output_name) + logger.info("generated simplified onnx model named {}".format(args.output_name)) + + +if __name__ == "__main__": + main() diff --git a/tools/interpolation.py b/tools/interpolation.py new file mode 100644 index 0000000000000000000000000000000000000000..b5e4b3dbc1d4af2381b8f508ffd4169825c6f81e --- /dev/null +++ b/tools/interpolation.py @@ -0,0 +1,143 @@ +import numpy as np +import os +import glob +import motmetrics as mm + +from yolox.evaluators.evaluation import Evaluator + + +def mkdir_if_missing(d): + if not os.path.exists(d): + os.makedirs(d) + + +def eval_mota(data_root, txt_path): + accs = [] + seqs = sorted([s for s in os.listdir(data_root) if s.endswith('FRCNN')]) + #seqs = sorted([s for s in os.listdir(data_root)]) + for seq in seqs: + video_out_path = os.path.join(txt_path, seq + '.txt') + evaluator = Evaluator(data_root, seq, 'mot') + accs.append(evaluator.eval_file(video_out_path)) + metrics = mm.metrics.motchallenge_metrics + mh = mm.metrics.create() + summary = Evaluator.get_summary(accs, seqs, metrics) + strsummary = mm.io.render_summary( + summary, + formatters=mh.formatters, + namemap=mm.io.motchallenge_metric_names + ) + print(strsummary) + + +def get_mota(data_root, txt_path): + accs = [] + seqs = sorted([s for s in os.listdir(data_root) if s.endswith('FRCNN')]) + #seqs = sorted([s for s in os.listdir(data_root)]) + for seq in seqs: + video_out_path = os.path.join(txt_path, seq + '.txt') + evaluator = Evaluator(data_root, seq, 'mot') + accs.append(evaluator.eval_file(video_out_path)) + metrics = mm.metrics.motchallenge_metrics + mh = mm.metrics.create() + summary = Evaluator.get_summary(accs, seqs, metrics) + strsummary = mm.io.render_summary( + summary, + formatters=mh.formatters, + namemap=mm.io.motchallenge_metric_names + ) + mota = float(strsummary.split(' ')[-6][:-1]) + return mota + + +def write_results_score(filename, results): + save_format = '{frame},{id},{x1},{y1},{w},{h},{s},-1,-1,-1\n' + with open(filename, 'w') as f: + for i in range(results.shape[0]): + frame_data = results[i] + frame_id = int(frame_data[0]) + track_id = int(frame_data[1]) + x1, y1, w, h = frame_data[2:6] + score = frame_data[6] + line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, w=w, h=h, s=-1) + f.write(line) + + +def dti(txt_path, save_path, n_min=25, n_dti=20): + seq_txts = sorted(glob.glob(os.path.join(txt_path, '*.txt'))) + for seq_txt in seq_txts: + seq_name = seq_txt.split('/')[-1] + seq_data = np.loadtxt(seq_txt, dtype=np.float64, delimiter=',') + min_id = int(np.min(seq_data[:, 1])) + max_id = int(np.max(seq_data[:, 1])) + seq_results = np.zeros((1, 10), dtype=np.float64) + for track_id in range(min_id, max_id + 1): + index = (seq_data[:, 1] == track_id) + tracklet = seq_data[index] + tracklet_dti = tracklet + if tracklet.shape[0] == 0: + continue + n_frame = tracklet.shape[0] + n_conf = np.sum(tracklet[:, 6] > 0.5) + if n_frame > n_min: + frames = tracklet[:, 0] + frames_dti = {} + for i in range(0, n_frame): + right_frame = frames[i] + if i > 0: + left_frame = frames[i - 1] + else: + left_frame = frames[i] + # disconnected track interpolation + if 1 < right_frame - left_frame < n_dti: + num_bi = int(right_frame - left_frame - 1) + right_bbox = tracklet[i, 2:6] + left_bbox = tracklet[i - 1, 2:6] + for j in range(1, num_bi + 1): + curr_frame = j + left_frame + curr_bbox = (curr_frame - left_frame) * (right_bbox - left_bbox) / \ + (right_frame - left_frame) + left_bbox + frames_dti[curr_frame] = curr_bbox + num_dti = len(frames_dti.keys()) + if num_dti > 0: + data_dti = np.zeros((num_dti, 10), dtype=np.float64) + for n in range(num_dti): + data_dti[n, 0] = list(frames_dti.keys())[n] + data_dti[n, 1] = track_id + data_dti[n, 2:6] = frames_dti[list(frames_dti.keys())[n]] + data_dti[n, 6:] = [1, -1, -1, -1] + tracklet_dti = np.vstack((tracklet, data_dti)) + seq_results = np.vstack((seq_results, tracklet_dti)) + save_seq_txt = os.path.join(save_path, seq_name) + seq_results = seq_results[1:] + seq_results = seq_results[seq_results[:, 0].argsort()] + write_results_score(save_seq_txt, seq_results) + + +if __name__ == '__main__': + data_root = '/opt/tiger/demo/ByteTrack/datasets/mot/test' + txt_path = '/opt/tiger/demo/ByteTrack/YOLOX_outputs/yolox_x_mix_det/track_results' + save_path = '/opt/tiger/demo/ByteTrack/YOLOX_outputs/yolox_x_mix_det/track_results_dti' + + mkdir_if_missing(save_path) + dti(txt_path, save_path, n_min=5, n_dti=20) + print('Before DTI: ') + eval_mota(data_root, txt_path) + print('After DTI:') + eval_mota(data_root, save_path) + + ''' + mota_best = 0.0 + best_n_min = 0 + best_n_dti = 0 + for n_min in range(5, 50, 5): + for n_dti in range(5, 30, 5): + dti(txt_path, save_path, n_min, n_dti) + mota = get_mota(data_root, save_path) + if mota > mota_best: + mota_best = mota + best_n_min = n_min + best_n_dti = n_dti + print(mota_best, best_n_min, best_n_dti) + print(mota_best, best_n_min, best_n_dti) + ''' diff --git a/tools/mix_data_ablation.py b/tools/mix_data_ablation.py new file mode 100644 index 0000000000000000000000000000000000000000..b830c691ce52756aac2a8569829297b37ec5147d --- /dev/null +++ b/tools/mix_data_ablation.py @@ -0,0 +1,93 @@ +import json +import os + + +""" +cd datasets +mkdir -p mix_mot_ch/annotations +cp mot/annotations/val_half.json mix_mot_ch/annotations/val_half.json +cp mot/annotations/test.json mix_mot_ch/annotations/test.json +cd mix_mot_ch +ln -s ../mot/train mot_train +ln -s ../crowdhuman/CrowdHuman_train crowdhuman_train +ln -s ../crowdhuman/CrowdHuman_val crowdhuman_val +cd .. +""" + +mot_json = json.load(open('datasets/mot/annotations/train_half.json','r')) + +img_list = list() +for img in mot_json['images']: + img['file_name'] = 'mot_train/' + img['file_name'] + img_list.append(img) + +ann_list = list() +for ann in mot_json['annotations']: + ann_list.append(ann) + +video_list = mot_json['videos'] +category_list = mot_json['categories'] + +print('mot17') + +max_img = 10000 +max_ann = 2000000 +max_video = 10 + +crowdhuman_json = json.load(open('datasets/crowdhuman/annotations/train.json','r')) +img_id_count = 0 +for img in crowdhuman_json['images']: + img_id_count += 1 + img['file_name'] = 'crowdhuman_train/' + img['file_name'] + img['frame_id'] = img_id_count + img['prev_image_id'] = img['id'] + max_img + img['next_image_id'] = img['id'] + max_img + img['id'] = img['id'] + max_img + img['video_id'] = max_video + img_list.append(img) + +for ann in crowdhuman_json['annotations']: + ann['id'] = ann['id'] + max_ann + ann['image_id'] = ann['image_id'] + max_img + ann_list.append(ann) + +video_list.append({ + 'id': max_video, + 'file_name': 'crowdhuman_train' +}) + +print('crowdhuman_train') + +max_img = 30000 +max_ann = 10000000 + +crowdhuman_val_json = json.load(open('datasets/crowdhuman/annotations/val.json','r')) +img_id_count = 0 +for img in crowdhuman_val_json['images']: + img_id_count += 1 + img['file_name'] = 'crowdhuman_val/' + img['file_name'] + img['frame_id'] = img_id_count + img['prev_image_id'] = img['id'] + max_img + img['next_image_id'] = img['id'] + max_img + img['id'] = img['id'] + max_img + img['video_id'] = max_video + img_list.append(img) + +for ann in crowdhuman_val_json['annotations']: + ann['id'] = ann['id'] + max_ann + ann['image_id'] = ann['image_id'] + max_img + ann_list.append(ann) + +video_list.append({ + 'id': max_video, + 'file_name': 'crowdhuman_val' +}) + +print('crowdhuman_val') + +mix_json = dict() +mix_json['images'] = img_list +mix_json['annotations'] = ann_list +mix_json['videos'] = video_list +mix_json['categories'] = category_list +json.dump(mix_json, open('datasets/mix_mot_ch/annotations/train.json','w')) \ No newline at end of file diff --git a/tools/mix_data_test_mot17.py b/tools/mix_data_test_mot17.py new file mode 100644 index 0000000000000000000000000000000000000000..b0848db812dfe63e631dd8e35a401d7dbaecd767 --- /dev/null +++ b/tools/mix_data_test_mot17.py @@ -0,0 +1,151 @@ +import json +import os + + +""" +cd datasets +mkdir -p mix_det/annotations +cp mot/annotations/val_half.json mix_det/annotations/val_half.json +cp mot/annotations/test.json mix_det/annotations/test.json +cd mix_det +ln -s ../mot/train mot_train +ln -s ../crowdhuman/CrowdHuman_train crowdhuman_train +ln -s ../crowdhuman/CrowdHuman_val crowdhuman_val +ln -s ../Cityscapes cp_train +ln -s ../ETHZ ethz_train +cd .. +""" + +mot_json = json.load(open('datasets/mot/annotations/train_half.json','r')) + +img_list = list() +for img in mot_json['images']: + img['file_name'] = 'mot_train/' + img['file_name'] + img_list.append(img) + +ann_list = list() +for ann in mot_json['annotations']: + ann_list.append(ann) + +video_list = mot_json['videos'] +category_list = mot_json['categories'] + + +print('mot17') + +max_img = 10000 +max_ann = 2000000 +max_video = 10 + +crowdhuman_json = json.load(open('datasets/crowdhuman/annotations/train.json','r')) +img_id_count = 0 +for img in crowdhuman_json['images']: + img_id_count += 1 + img['file_name'] = 'crowdhuman_train/' + img['file_name'] + img['frame_id'] = img_id_count + img['prev_image_id'] = img['id'] + max_img + img['next_image_id'] = img['id'] + max_img + img['id'] = img['id'] + max_img + img['video_id'] = max_video + img_list.append(img) + +for ann in crowdhuman_json['annotations']: + ann['id'] = ann['id'] + max_ann + ann['image_id'] = ann['image_id'] + max_img + ann_list.append(ann) + +print('crowdhuman_train') + +video_list.append({ + 'id': max_video, + 'file_name': 'crowdhuman_train' +}) + + +max_img = 30000 +max_ann = 10000000 + +crowdhuman_val_json = json.load(open('datasets/crowdhuman/annotations/val.json','r')) +img_id_count = 0 +for img in crowdhuman_val_json['images']: + img_id_count += 1 + img['file_name'] = 'crowdhuman_val/' + img['file_name'] + img['frame_id'] = img_id_count + img['prev_image_id'] = img['id'] + max_img + img['next_image_id'] = img['id'] + max_img + img['id'] = img['id'] + max_img + img['video_id'] = max_video + img_list.append(img) + +for ann in crowdhuman_val_json['annotations']: + ann['id'] = ann['id'] + max_ann + ann['image_id'] = ann['image_id'] + max_img + ann_list.append(ann) + +print('crowdhuman_val') + +video_list.append({ + 'id': max_video, + 'file_name': 'crowdhuman_val' +}) + +max_img = 40000 +max_ann = 20000000 + +ethz_json = json.load(open('datasets/ETHZ/annotations/train.json','r')) +img_id_count = 0 +for img in ethz_json['images']: + img_id_count += 1 + img['file_name'] = 'ethz_train/' + img['file_name'][5:] + img['frame_id'] = img_id_count + img['prev_image_id'] = img['id'] + max_img + img['next_image_id'] = img['id'] + max_img + img['id'] = img['id'] + max_img + img['video_id'] = max_video + img_list.append(img) + +for ann in ethz_json['annotations']: + ann['id'] = ann['id'] + max_ann + ann['image_id'] = ann['image_id'] + max_img + ann_list.append(ann) + +print('ETHZ') + +video_list.append({ + 'id': max_video, + 'file_name': 'ethz' +}) + +max_img = 50000 +max_ann = 25000000 + +cp_json = json.load(open('datasets/Cityscapes/annotations/train.json','r')) +img_id_count = 0 +for img in cp_json['images']: + img_id_count += 1 + img['file_name'] = 'cp_train/' + img['file_name'][11:] + img['frame_id'] = img_id_count + img['prev_image_id'] = img['id'] + max_img + img['next_image_id'] = img['id'] + max_img + img['id'] = img['id'] + max_img + img['video_id'] = max_video + img_list.append(img) + +for ann in cp_json['annotations']: + ann['id'] = ann['id'] + max_ann + ann['image_id'] = ann['image_id'] + max_img + ann_list.append(ann) + +print('Cityscapes') + +video_list.append({ + 'id': max_video, + 'file_name': 'cityperson' +}) + +mix_json = dict() +mix_json['images'] = img_list +mix_json['annotations'] = ann_list +mix_json['videos'] = video_list +mix_json['categories'] = category_list +json.dump(mix_json, open('datasets/mix_det/annotations/train.json','w')) \ No newline at end of file diff --git a/tools/mix_data_test_mot20.py b/tools/mix_data_test_mot20.py new file mode 100644 index 0000000000000000000000000000000000000000..e7bbafc2156dfc53536f547ed17e744f7cc0513e --- /dev/null +++ b/tools/mix_data_test_mot20.py @@ -0,0 +1,91 @@ +import json +import os + + +""" +cd datasets +mkdir -p mix_mot20_ch/annotations +cp MOT20/annotations/val_half.json mix_mot20_ch/annotations/val_half.json +cp MOT20/annotations/test.json mix_mot20_ch/annotations/test.json +cd mix_mot20_ch +ln -s ../MOT20/train mot20_train +ln -s ../crowdhuman/CrowdHuman_train crowdhuman_train +ln -s ../crowdhuman/CrowdHuman_val crowdhuman_val +cd .. +""" + +mot_json = json.load(open('datasets/MOT20/annotations/train.json','r')) + +img_list = list() +for img in mot_json['images']: + img['file_name'] = 'mot20_train/' + img['file_name'] + img_list.append(img) + +ann_list = list() +for ann in mot_json['annotations']: + ann_list.append(ann) + +video_list = mot_json['videos'] +category_list = mot_json['categories'] + + + + +max_img = 10000 +max_ann = 2000000 +max_video = 10 + +crowdhuman_json = json.load(open('datasets/crowdhuman/annotations/train.json','r')) +img_id_count = 0 +for img in crowdhuman_json['images']: + img_id_count += 1 + img['file_name'] = 'crowdhuman_train/' + img['file_name'] + img['frame_id'] = img_id_count + img['prev_image_id'] = img['id'] + max_img + img['next_image_id'] = img['id'] + max_img + img['id'] = img['id'] + max_img + img['video_id'] = max_video + img_list.append(img) + +for ann in crowdhuman_json['annotations']: + ann['id'] = ann['id'] + max_ann + ann['image_id'] = ann['image_id'] + max_img + ann_list.append(ann) + +video_list.append({ + 'id': max_video, + 'file_name': 'crowdhuman_train' +}) + + +max_img = 30000 +max_ann = 10000000 + +crowdhuman_val_json = json.load(open('datasets/crowdhuman/annotations/val.json','r')) +img_id_count = 0 +for img in crowdhuman_val_json['images']: + img_id_count += 1 + img['file_name'] = 'crowdhuman_val/' + img['file_name'] + img['frame_id'] = img_id_count + img['prev_image_id'] = img['id'] + max_img + img['next_image_id'] = img['id'] + max_img + img['id'] = img['id'] + max_img + img['video_id'] = max_video + img_list.append(img) + +for ann in crowdhuman_val_json['annotations']: + ann['id'] = ann['id'] + max_ann + ann['image_id'] = ann['image_id'] + max_img + ann_list.append(ann) + +video_list.append({ + 'id': max_video, + 'file_name': 'crowdhuman_val' +}) + +mix_json = dict() +mix_json['images'] = img_list +mix_json['annotations'] = ann_list +mix_json['videos'] = video_list +mix_json['categories'] = category_list +json.dump(mix_json, open('datasets/mix_mot20_ch/annotations/train.json','w')) \ No newline at end of file diff --git a/tools/mota.py b/tools/mota.py new file mode 100644 index 0000000000000000000000000000000000000000..29608a91999680e20d003c8443afc4ba35e9196a --- /dev/null +++ b/tools/mota.py @@ -0,0 +1,84 @@ +from loguru import logger + +import torch +import torch.backends.cudnn as cudnn +from torch.nn.parallel import DistributedDataParallel as DDP + +from yolox.core import launch +from yolox.exp import get_exp +from yolox.utils import configure_nccl, fuse_model, get_local_rank, get_model_info, setup_logger +from yolox.evaluators import MOTEvaluator + +import argparse +import os +import random +import warnings +import glob +import motmetrics as mm +from collections import OrderedDict +from pathlib import Path + + +def compare_dataframes(gts, ts): + accs = [] + names = [] + for k, tsacc in ts.items(): + if k in gts: + logger.info('Comparing {}...'.format(k)) + accs.append(mm.utils.compare_to_groundtruth(gts[k], tsacc, 'iou', distth=0.5)) + names.append(k) + else: + logger.warning('No ground truth for {}, skipping.'.format(k)) + + return accs, names + + +# evaluate MOTA +results_folder = 'YOLOX_outputs/yolox_x_ablation/track_results' +mm.lap.default_solver = 'lap' + +gt_type = '_val_half' +#gt_type = '' +print('gt_type', gt_type) +gtfiles = glob.glob( + os.path.join('datasets/mot/train', '*/gt/gt{}.txt'.format(gt_type))) +print('gt_files', gtfiles) +tsfiles = [f for f in glob.glob(os.path.join(results_folder, '*.txt')) if not os.path.basename(f).startswith('eval')] + +logger.info('Found {} groundtruths and {} test files.'.format(len(gtfiles), len(tsfiles))) +logger.info('Available LAP solvers {}'.format(mm.lap.available_solvers)) +logger.info('Default LAP solver \'{}\''.format(mm.lap.default_solver)) +logger.info('Loading files.') + +gt = OrderedDict([(Path(f).parts[-3], mm.io.loadtxt(f, fmt='mot15-2D', min_confidence=1)) for f in gtfiles]) +ts = OrderedDict([(os.path.splitext(Path(f).parts[-1])[0], mm.io.loadtxt(f, fmt='mot15-2D', min_confidence=0.6)) for f in tsfiles]) + +mh = mm.metrics.create() +accs, names = compare_dataframes(gt, ts) + +logger.info('Running metrics') +metrics = ['recall', 'precision', 'num_unique_objects', 'mostly_tracked', + 'partially_tracked', 'mostly_lost', 'num_false_positives', 'num_misses', + 'num_switches', 'num_fragmentations', 'mota', 'motp', 'num_objects'] +summary = mh.compute_many(accs, names=names, metrics=metrics, generate_overall=True) +# summary = mh.compute_many(accs, names=names, metrics=mm.metrics.motchallenge_metrics, generate_overall=True) +# print(mm.io.render_summary( +# summary, formatters=mh.formatters, +# namemap=mm.io.motchallenge_metric_names)) +div_dict = { + 'num_objects': ['num_false_positives', 'num_misses', 'num_switches', 'num_fragmentations'], + 'num_unique_objects': ['mostly_tracked', 'partially_tracked', 'mostly_lost']} +for divisor in div_dict: + for divided in div_dict[divisor]: + summary[divided] = (summary[divided] / summary[divisor]) +fmt = mh.formatters +change_fmt_list = ['num_false_positives', 'num_misses', 'num_switches', 'num_fragmentations', 'mostly_tracked', + 'partially_tracked', 'mostly_lost'] +for k in change_fmt_list: + fmt[k] = fmt['mota'] +print(mm.io.render_summary(summary, formatters=fmt, namemap=mm.io.motchallenge_metric_names)) + +metrics = mm.metrics.motchallenge_metrics + ['num_objects'] +summary = mh.compute_many(accs, names=names, metrics=metrics, generate_overall=True) +print(mm.io.render_summary(summary, formatters=mh.formatters, namemap=mm.io.motchallenge_metric_names)) +logger.info('Completed') \ No newline at end of file diff --git a/tools/track.py b/tools/track.py new file mode 100644 index 0000000000000000000000000000000000000000..ee7769a543b417f84968301153e8d6d0d2d659a0 --- /dev/null +++ b/tools/track.py @@ -0,0 +1,293 @@ +from loguru import logger + +import torch +import torch.backends.cudnn as cudnn +from torch.nn.parallel import DistributedDataParallel as DDP + +from yolox.core import launch +from yolox.exp import get_exp +from yolox.utils import configure_nccl, fuse_model, get_local_rank, get_model_info, setup_logger +from yolox.evaluators import MOTEvaluator + +import argparse +import os +import random +import warnings +import glob +import motmetrics as mm +from collections import OrderedDict +from pathlib import Path + + +def make_parser(): + parser = argparse.ArgumentParser("YOLOX Eval") + parser.add_argument("-expn", "--experiment-name", type=str, default=None) + parser.add_argument("-n", "--name", type=str, default=None, help="model name") + + # distributed + parser.add_argument( + "--dist-backend", default="nccl", type=str, help="distributed backend" + ) + parser.add_argument( + "--dist-url", + default=None, + type=str, + help="url used to set up distributed training", + ) + parser.add_argument("-b", "--batch-size", type=int, default=64, help="batch size") + parser.add_argument( + "-d", "--devices", default=None, type=int, help="device for training" + ) + parser.add_argument( + "--local_rank", default=0, type=int, help="local rank for dist training" + ) + parser.add_argument( + "--num_machines", default=1, type=int, help="num of node for training" + ) + parser.add_argument( + "--machine_rank", default=0, type=int, help="node rank for multi-node training" + ) + parser.add_argument( + "-f", + "--exp_file", + default=None, + type=str, + help="pls input your expriment description file", + ) + parser.add_argument( + "--fp16", + dest="fp16", + default=False, + action="store_true", + help="Adopting mix precision evaluating.", + ) + parser.add_argument( + "--fuse", + dest="fuse", + default=False, + action="store_true", + help="Fuse conv and bn for testing.", + ) + parser.add_argument( + "--trt", + dest="trt", + default=False, + action="store_true", + help="Using TensorRT model for testing.", + ) + parser.add_argument( + "--test", + dest="test", + default=False, + action="store_true", + help="Evaluating on test-dev set.", + ) + parser.add_argument( + "--speed", + dest="speed", + default=False, + action="store_true", + help="speed test only.", + ) + parser.add_argument( + "opts", + help="Modify config options using the command-line", + default=None, + nargs=argparse.REMAINDER, + ) + # det args + parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt for eval") + parser.add_argument("--conf", default=0.01, type=float, help="test conf") + parser.add_argument("--nms", default=0.7, type=float, help="test nms threshold") + parser.add_argument("--tsize", default=None, type=int, help="test img size") + parser.add_argument("--seed", default=None, type=int, help="eval seed") + # tracking args + parser.add_argument("--track_thresh", type=float, default=0.6, help="tracking confidence threshold") + parser.add_argument("--track_buffer", type=int, default=30, help="the frames for keep lost tracks") + parser.add_argument("--match_thresh", type=float, default=0.9, help="matching threshold for tracking") + parser.add_argument("--min-box-area", type=float, default=100, help='filter out tiny boxes') + parser.add_argument("--mot20", dest="mot20", default=False, action="store_true", help="test mot20.") + return parser + + +def compare_dataframes(gts, ts): + accs = [] + names = [] + for k, tsacc in ts.items(): + if k in gts: + logger.info('Comparing {}...'.format(k)) + accs.append(mm.utils.compare_to_groundtruth(gts[k], tsacc, 'iou', distth=0.5)) + names.append(k) + else: + logger.warning('No ground truth for {}, skipping.'.format(k)) + + return accs, names + + +@logger.catch +def main(exp, args, num_gpu): + if args.seed is not None: + random.seed(args.seed) + torch.manual_seed(args.seed) + cudnn.deterministic = True + warnings.warn( + "You have chosen to seed testing. This will turn on the CUDNN deterministic setting, " + ) + + is_distributed = num_gpu > 1 + + # set environment variables for distributed training + cudnn.benchmark = True + + rank = args.local_rank + # rank = get_local_rank() + + file_name = os.path.join(exp.output_dir, args.experiment_name) + + if rank == 0: + os.makedirs(file_name, exist_ok=True) + + results_folder = os.path.join(file_name, "track_results") + os.makedirs(results_folder, exist_ok=True) + + setup_logger(file_name, distributed_rank=rank, filename="val_log.txt", mode="a") + logger.info("Args: {}".format(args)) + + if args.conf is not None: + exp.test_conf = args.conf + if args.nms is not None: + exp.nmsthre = args.nms + if args.tsize is not None: + exp.test_size = (args.tsize, args.tsize) + + model = exp.get_model() + logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size))) + #logger.info("Model Structure:\n{}".format(str(model))) + + val_loader = exp.get_eval_loader(args.batch_size, is_distributed, args.test) + evaluator = MOTEvaluator( + args=args, + dataloader=val_loader, + img_size=exp.test_size, + confthre=exp.test_conf, + nmsthre=exp.nmsthre, + num_classes=exp.num_classes, + ) + + torch.cuda.set_device(rank) + model.cuda(rank) + model.eval() + + if not args.speed and not args.trt: + if args.ckpt is None: + ckpt_file = os.path.join(file_name, "best_ckpt.pth.tar") + else: + ckpt_file = args.ckpt + logger.info("loading checkpoint") + loc = "cuda:{}".format(rank) + ckpt = torch.load(ckpt_file, map_location=loc) + # load the model state dict + model.load_state_dict(ckpt["model"]) + logger.info("loaded checkpoint done.") + + if is_distributed: + model = DDP(model, device_ids=[rank]) + + if args.fuse: + logger.info("\tFusing model...") + model = fuse_model(model) + + if args.trt: + assert ( + not args.fuse and not is_distributed and args.batch_size == 1 + ), "TensorRT model is not support model fusing and distributed inferencing!" + trt_file = os.path.join(file_name, "model_trt.pth") + assert os.path.exists( + trt_file + ), "TensorRT model is not found!\n Run tools/trt.py first!" + model.head.decode_in_inference = False + decoder = model.head.decode_outputs + else: + trt_file = None + decoder = None + + # start evaluate + *_, summary = evaluator.evaluate( + model, is_distributed, args.fp16, trt_file, decoder, exp.test_size, results_folder + ) + logger.info("\n" + summary) + + # evaluate MOTA + mm.lap.default_solver = 'lap' + + if exp.val_ann == 'val_half.json': + gt_type = '_val_half' + else: + gt_type = '' + print('gt_type', gt_type) + if args.mot20: + gtfiles = glob.glob(os.path.join('datasets/MOT20/train', '*/gt/gt{}.txt'.format(gt_type))) + else: + gtfiles = glob.glob(os.path.join('datasets/mot/train', '*/gt/gt{}.txt'.format(gt_type))) + print('gt_files', gtfiles) + tsfiles = [f for f in glob.glob(os.path.join(results_folder, '*.txt')) if not os.path.basename(f).startswith('eval')] + + logger.info('Found {} groundtruths and {} test files.'.format(len(gtfiles), len(tsfiles))) + logger.info('Available LAP solvers {}'.format(mm.lap.available_solvers)) + logger.info('Default LAP solver \'{}\''.format(mm.lap.default_solver)) + logger.info('Loading files.') + + gt = OrderedDict([(Path(f).parts[-3], mm.io.loadtxt(f, fmt='mot15-2D', min_confidence=1)) for f in gtfiles]) + ts = OrderedDict([(os.path.splitext(Path(f).parts[-1])[0], mm.io.loadtxt(f, fmt='mot15-2D', min_confidence=-1)) for f in tsfiles]) + + mh = mm.metrics.create() + accs, names = compare_dataframes(gt, ts) + + logger.info('Running metrics') + metrics = ['recall', 'precision', 'num_unique_objects', 'mostly_tracked', + 'partially_tracked', 'mostly_lost', 'num_false_positives', 'num_misses', + 'num_switches', 'num_fragmentations', 'mota', 'motp', 'num_objects'] + summary = mh.compute_many(accs, names=names, metrics=metrics, generate_overall=True) + # summary = mh.compute_many(accs, names=names, metrics=mm.metrics.motchallenge_metrics, generate_overall=True) + # print(mm.io.render_summary( + # summary, formatters=mh.formatters, + # namemap=mm.io.motchallenge_metric_names)) + div_dict = { + 'num_objects': ['num_false_positives', 'num_misses', 'num_switches', 'num_fragmentations'], + 'num_unique_objects': ['mostly_tracked', 'partially_tracked', 'mostly_lost']} + for divisor in div_dict: + for divided in div_dict[divisor]: + summary[divided] = (summary[divided] / summary[divisor]) + fmt = mh.formatters + change_fmt_list = ['num_false_positives', 'num_misses', 'num_switches', 'num_fragmentations', 'mostly_tracked', + 'partially_tracked', 'mostly_lost'] + for k in change_fmt_list: + fmt[k] = fmt['mota'] + print(mm.io.render_summary(summary, formatters=fmt, namemap=mm.io.motchallenge_metric_names)) + + metrics = mm.metrics.motchallenge_metrics + ['num_objects'] + summary = mh.compute_many(accs, names=names, metrics=metrics, generate_overall=True) + print(mm.io.render_summary(summary, formatters=mh.formatters, namemap=mm.io.motchallenge_metric_names)) + logger.info('Completed') + + +if __name__ == "__main__": + args = make_parser().parse_args() + exp = get_exp(args.exp_file, args.name) + exp.merge(args.opts) + + if not args.experiment_name: + args.experiment_name = exp.exp_name + + num_gpu = torch.cuda.device_count() if args.devices is None else args.devices + assert num_gpu <= torch.cuda.device_count() + + launch( + main, + num_gpu, + args.num_machines, + args.machine_rank, + backend=args.dist_backend, + dist_url=args.dist_url, + args=(exp, args, num_gpu), + ) diff --git a/tools/track_deepsort.py b/tools/track_deepsort.py new file mode 100644 index 0000000000000000000000000000000000000000..06f4106858754ad80fe51356a67da5665ebcf92d --- /dev/null +++ b/tools/track_deepsort.py @@ -0,0 +1,293 @@ +from loguru import logger + +import torch +import torch.backends.cudnn as cudnn +from torch.nn.parallel import DistributedDataParallel as DDP + +from yolox.core import launch +from yolox.exp import get_exp +from yolox.utils import configure_nccl, fuse_model, get_local_rank, get_model_info, setup_logger +from yolox.evaluators import MOTEvaluator + +import argparse +import os +import random +import warnings +import glob +import motmetrics as mm +from collections import OrderedDict +from pathlib import Path + + +def make_parser(): + parser = argparse.ArgumentParser("YOLOX Eval") + parser.add_argument("-expn", "--experiment-name", type=str, default=None) + parser.add_argument("-n", "--name", type=str, default=None, help="model name") + + # distributed + parser.add_argument( + "--dist-backend", default="nccl", type=str, help="distributed backend" + ) + parser.add_argument( + "--dist-url", + default=None, + type=str, + help="url used to set up distributed training", + ) + parser.add_argument("-b", "--batch-size", type=int, default=64, help="batch size") + parser.add_argument( + "-d", "--devices", default=None, type=int, help="device for training" + ) + parser.add_argument( + "--local_rank", default=0, type=int, help="local rank for dist training" + ) + parser.add_argument( + "--num_machines", default=1, type=int, help="num of node for training" + ) + parser.add_argument( + "--machine_rank", default=0, type=int, help="node rank for multi-node training" + ) + parser.add_argument( + "-f", + "--exp_file", + default=None, + type=str, + help="pls input your expriment description file", + ) + parser.add_argument( + "--fp16", + dest="fp16", + default=False, + action="store_true", + help="Adopting mix precision evaluating.", + ) + parser.add_argument( + "--fuse", + dest="fuse", + default=False, + action="store_true", + help="Fuse conv and bn for testing.", + ) + parser.add_argument( + "--trt", + dest="trt", + default=False, + action="store_true", + help="Using TensorRT model for testing.", + ) + parser.add_argument( + "--test", + dest="test", + default=False, + action="store_true", + help="Evaluating on test-dev set.", + ) + parser.add_argument( + "--speed", + dest="speed", + default=False, + action="store_true", + help="speed test only.", + ) + parser.add_argument( + "opts", + help="Modify config options using the command-line", + default=None, + nargs=argparse.REMAINDER, + ) + # det args + parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt for eval") + parser.add_argument("--conf", default=0.1, type=float, help="test conf") + parser.add_argument("--nms", default=0.7, type=float, help="test nms threshold") + parser.add_argument("--tsize", default=None, type=int, help="test img size") + parser.add_argument("--seed", default=None, type=int, help="eval seed") + # tracking args + parser.add_argument("--track_thresh", type=float, default=0.6, help="tracking confidence threshold") + parser.add_argument("--track_buffer", type=int, default=30, help="the frames for keep lost tracks") + parser.add_argument("--match_thresh", type=int, default=0.9, help="matching threshold for tracking") + parser.add_argument('--min-box-area', type=float, default=100, help='filter out tiny boxes') + # deepsort args + parser.add_argument("--model_folder", type=str, default='pretrained/ckpt.t7', help="reid model folder") + return parser + + +def compare_dataframes(gts, ts): + accs = [] + names = [] + for k, tsacc in ts.items(): + if k in gts: + logger.info('Comparing {}...'.format(k)) + accs.append(mm.utils.compare_to_groundtruth(gts[k], tsacc, 'iou', distth=0.5)) + names.append(k) + else: + logger.warning('No ground truth for {}, skipping.'.format(k)) + + return accs, names + + +@logger.catch +def main(exp, args, num_gpu): + if args.seed is not None: + random.seed(args.seed) + torch.manual_seed(args.seed) + cudnn.deterministic = True + warnings.warn( + "You have chosen to seed testing. This will turn on the CUDNN deterministic setting, " + ) + + is_distributed = num_gpu > 1 + + # set environment variables for distributed training + cudnn.benchmark = True + + rank = args.local_rank + # rank = get_local_rank() + + file_name = os.path.join(exp.output_dir, args.experiment_name) + + if rank == 0: + os.makedirs(file_name, exist_ok=True) + + results_folder = os.path.join(file_name, "track_results_deepsort") + os.makedirs(results_folder, exist_ok=True) + model_folder = args.model_folder + + setup_logger(file_name, distributed_rank=rank, filename="val_log.txt", mode="a") + logger.info("Args: {}".format(args)) + + if args.conf is not None: + exp.test_conf = args.conf + if args.nms is not None: + exp.nmsthre = args.nms + if args.tsize is not None: + exp.test_size = (args.tsize, args.tsize) + + model = exp.get_model() + logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size))) + #logger.info("Model Structure:\n{}".format(str(model))) + + #evaluator = exp.get_evaluator(args.batch_size, is_distributed, args.test) + + val_loader = exp.get_eval_loader(args.batch_size, is_distributed, args.test) + evaluator = MOTEvaluator( + args=args, + dataloader=val_loader, + img_size=exp.test_size, + confthre=exp.test_conf, + nmsthre=exp.nmsthre, + num_classes=exp.num_classes, + ) + + torch.cuda.set_device(rank) + model.cuda(rank) + model.eval() + + if not args.speed and not args.trt: + if args.ckpt is None: + ckpt_file = os.path.join(file_name, "best_ckpt.pth.tar") + else: + ckpt_file = args.ckpt + logger.info("loading checkpoint") + loc = "cuda:{}".format(rank) + ckpt = torch.load(ckpt_file, map_location=loc) + # load the model state dict + model.load_state_dict(ckpt["model"]) + logger.info("loaded checkpoint done.") + + if is_distributed: + model = DDP(model, device_ids=[rank]) + + if args.fuse: + logger.info("\tFusing model...") + model = fuse_model(model) + + if args.trt: + assert ( + not args.fuse and not is_distributed and args.batch_size == 1 + ), "TensorRT model is not support model fusing and distributed inferencing!" + trt_file = os.path.join(file_name, "model_trt.pth") + assert os.path.exists( + trt_file + ), "TensorRT model is not found!\n Run tools/trt.py first!" + model.head.decode_in_inference = False + decoder = model.head.decode_outputs + else: + trt_file = None + decoder = None + + # start evaluate + *_, summary = evaluator.evaluate_deepsort( + model, is_distributed, args.fp16, trt_file, decoder, exp.test_size, results_folder, model_folder + ) + logger.info("\n" + summary) + + # evaluate MOTA + mm.lap.default_solver = 'lap' + + gt_type = '_val_half' + #gt_type = '' + print('gt_type', gt_type) + gtfiles = glob.glob( + os.path.join('datasets/mot/train', '*/gt/gt{}.txt'.format(gt_type))) + print('gt_files', gtfiles) + tsfiles = [f for f in glob.glob(os.path.join(results_folder, '*.txt')) if not os.path.basename(f).startswith('eval')] + + logger.info('Found {} groundtruths and {} test files.'.format(len(gtfiles), len(tsfiles))) + logger.info('Available LAP solvers {}'.format(mm.lap.available_solvers)) + logger.info('Default LAP solver \'{}\''.format(mm.lap.default_solver)) + logger.info('Loading files.') + + gt = OrderedDict([(Path(f).parts[-3], mm.io.loadtxt(f, fmt='mot15-2D', min_confidence=1)) for f in gtfiles]) + ts = OrderedDict([(os.path.splitext(Path(f).parts[-1])[0], mm.io.loadtxt(f, fmt='mot15-2D', min_confidence=-1)) for f in tsfiles]) + + mh = mm.metrics.create() + accs, names = compare_dataframes(gt, ts) + + logger.info('Running metrics') + metrics = ['recall', 'precision', 'num_unique_objects', 'mostly_tracked', + 'partially_tracked', 'mostly_lost', 'num_false_positives', 'num_misses', + 'num_switches', 'num_fragmentations', 'mota', 'motp', 'num_objects'] + summary = mh.compute_many(accs, names=names, metrics=metrics, generate_overall=True) + # summary = mh.compute_many(accs, names=names, metrics=mm.metrics.motchallenge_metrics, generate_overall=True) + # print(mm.io.render_summary( + # summary, formatters=mh.formatters, + # namemap=mm.io.motchallenge_metric_names)) + div_dict = { + 'num_objects': ['num_false_positives', 'num_misses', 'num_switches', 'num_fragmentations'], + 'num_unique_objects': ['mostly_tracked', 'partially_tracked', 'mostly_lost']} + for divisor in div_dict: + for divided in div_dict[divisor]: + summary[divided] = (summary[divided] / summary[divisor]) + fmt = mh.formatters + change_fmt_list = ['num_false_positives', 'num_misses', 'num_switches', 'num_fragmentations', 'mostly_tracked', + 'partially_tracked', 'mostly_lost'] + for k in change_fmt_list: + fmt[k] = fmt['mota'] + print(mm.io.render_summary(summary, formatters=fmt, namemap=mm.io.motchallenge_metric_names)) + + metrics = mm.metrics.motchallenge_metrics + ['num_objects'] + summary = mh.compute_many(accs, names=names, metrics=metrics, generate_overall=True) + print(mm.io.render_summary(summary, formatters=mh.formatters, namemap=mm.io.motchallenge_metric_names)) + logger.info('Completed') + + +if __name__ == "__main__": + args = make_parser().parse_args() + exp = get_exp(args.exp_file, args.name) + exp.merge(args.opts) + + if not args.experiment_name: + args.experiment_name = exp.exp_name + + num_gpu = torch.cuda.device_count() if args.devices is None else args.devices + assert num_gpu <= torch.cuda.device_count() + + launch( + main, + num_gpu, + args.num_machines, + args.machine_rank, + backend=args.dist_backend, + dist_url=args.dist_url, + args=(exp, args, num_gpu), + ) diff --git a/tools/track_motdt.py b/tools/track_motdt.py new file mode 100644 index 0000000000000000000000000000000000000000..303815dca938c66147ac0cfd301bb7bb11e240ae --- /dev/null +++ b/tools/track_motdt.py @@ -0,0 +1,293 @@ +from loguru import logger + +import torch +import torch.backends.cudnn as cudnn +from torch.nn.parallel import DistributedDataParallel as DDP + +from yolox.core import launch +from yolox.exp import get_exp +from yolox.utils import configure_nccl, fuse_model, get_local_rank, get_model_info, setup_logger +from yolox.evaluators import MOTEvaluator + +import argparse +import os +import random +import warnings +import glob +import motmetrics as mm +from collections import OrderedDict +from pathlib import Path + + +def make_parser(): + parser = argparse.ArgumentParser("YOLOX Eval") + parser.add_argument("-expn", "--experiment-name", type=str, default=None) + parser.add_argument("-n", "--name", type=str, default=None, help="model name") + + # distributed + parser.add_argument( + "--dist-backend", default="nccl", type=str, help="distributed backend" + ) + parser.add_argument( + "--dist-url", + default=None, + type=str, + help="url used to set up distributed training", + ) + parser.add_argument("-b", "--batch-size", type=int, default=64, help="batch size") + parser.add_argument( + "-d", "--devices", default=None, type=int, help="device for training" + ) + parser.add_argument( + "--local_rank", default=0, type=int, help="local rank for dist training" + ) + parser.add_argument( + "--num_machines", default=1, type=int, help="num of node for training" + ) + parser.add_argument( + "--machine_rank", default=0, type=int, help="node rank for multi-node training" + ) + parser.add_argument( + "-f", + "--exp_file", + default=None, + type=str, + help="pls input your expriment description file", + ) + parser.add_argument( + "--fp16", + dest="fp16", + default=False, + action="store_true", + help="Adopting mix precision evaluating.", + ) + parser.add_argument( + "--fuse", + dest="fuse", + default=False, + action="store_true", + help="Fuse conv and bn for testing.", + ) + parser.add_argument( + "--trt", + dest="trt", + default=False, + action="store_true", + help="Using TensorRT model for testing.", + ) + parser.add_argument( + "--test", + dest="test", + default=False, + action="store_true", + help="Evaluating on test-dev set.", + ) + parser.add_argument( + "--speed", + dest="speed", + default=False, + action="store_true", + help="speed test only.", + ) + parser.add_argument( + "opts", + help="Modify config options using the command-line", + default=None, + nargs=argparse.REMAINDER, + ) + # det args + parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt for eval") + parser.add_argument("--conf", default=0.1, type=float, help="test conf") + parser.add_argument("--nms", default=0.7, type=float, help="test nms threshold") + parser.add_argument("--tsize", default=None, type=int, help="test img size") + parser.add_argument("--seed", default=None, type=int, help="eval seed") + # tracking args + parser.add_argument("--track_thresh", type=float, default=0.6, help="tracking confidence threshold") + parser.add_argument("--track_buffer", type=int, default=30, help="the frames for keep lost tracks") + parser.add_argument("--match_thresh", type=int, default=0.9, help="matching threshold for tracking") + parser.add_argument('--min-box-area', type=float, default=100, help='filter out tiny boxes') + # deepsort args + parser.add_argument("--model_folder", type=str, default='pretrained/googlenet_part8_all_xavier_ckpt_56.h5', help="reid model folder") + return parser + + +def compare_dataframes(gts, ts): + accs = [] + names = [] + for k, tsacc in ts.items(): + if k in gts: + logger.info('Comparing {}...'.format(k)) + accs.append(mm.utils.compare_to_groundtruth(gts[k], tsacc, 'iou', distth=0.5)) + names.append(k) + else: + logger.warning('No ground truth for {}, skipping.'.format(k)) + + return accs, names + + +@logger.catch +def main(exp, args, num_gpu): + if args.seed is not None: + random.seed(args.seed) + torch.manual_seed(args.seed) + cudnn.deterministic = True + warnings.warn( + "You have chosen to seed testing. This will turn on the CUDNN deterministic setting, " + ) + + is_distributed = num_gpu > 1 + + # set environment variables for distributed training + cudnn.benchmark = True + + rank = args.local_rank + # rank = get_local_rank() + + file_name = os.path.join(exp.output_dir, args.experiment_name) + + if rank == 0: + os.makedirs(file_name, exist_ok=True) + + results_folder = os.path.join(file_name, "track_results_motdt") + os.makedirs(results_folder, exist_ok=True) + model_folder = args.model_folder + + setup_logger(file_name, distributed_rank=rank, filename="val_log.txt", mode="a") + logger.info("Args: {}".format(args)) + + if args.conf is not None: + exp.test_conf = args.conf + if args.nms is not None: + exp.nmsthre = args.nms + if args.tsize is not None: + exp.test_size = (args.tsize, args.tsize) + + model = exp.get_model() + logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size))) + #logger.info("Model Structure:\n{}".format(str(model))) + + #evaluator = exp.get_evaluator(args.batch_size, is_distributed, args.test) + + val_loader = exp.get_eval_loader(args.batch_size, is_distributed, args.test) + evaluator = MOTEvaluator( + args=args, + dataloader=val_loader, + img_size=exp.test_size, + confthre=exp.test_conf, + nmsthre=exp.nmsthre, + num_classes=exp.num_classes, + ) + + torch.cuda.set_device(rank) + model.cuda(rank) + model.eval() + + if not args.speed and not args.trt: + if args.ckpt is None: + ckpt_file = os.path.join(file_name, "best_ckpt.pth.tar") + else: + ckpt_file = args.ckpt + logger.info("loading checkpoint") + loc = "cuda:{}".format(rank) + ckpt = torch.load(ckpt_file, map_location=loc) + # load the model state dict + model.load_state_dict(ckpt["model"]) + logger.info("loaded checkpoint done.") + + if is_distributed: + model = DDP(model, device_ids=[rank]) + + if args.fuse: + logger.info("\tFusing model...") + model = fuse_model(model) + + if args.trt: + assert ( + not args.fuse and not is_distributed and args.batch_size == 1 + ), "TensorRT model is not support model fusing and distributed inferencing!" + trt_file = os.path.join(file_name, "model_trt.pth") + assert os.path.exists( + trt_file + ), "TensorRT model is not found!\n Run tools/trt.py first!" + model.head.decode_in_inference = False + decoder = model.head.decode_outputs + else: + trt_file = None + decoder = None + + # start evaluate + *_, summary = evaluator.evaluate_motdt( + model, is_distributed, args.fp16, trt_file, decoder, exp.test_size, results_folder, model_folder + ) + logger.info("\n" + summary) + + # evaluate MOTA + mm.lap.default_solver = 'lap' + + gt_type = '_val_half' + #gt_type = '' + print('gt_type', gt_type) + gtfiles = glob.glob( + os.path.join('datasets/mot/train', '*/gt/gt{}.txt'.format(gt_type))) + print('gt_files', gtfiles) + tsfiles = [f for f in glob.glob(os.path.join(results_folder, '*.txt')) if not os.path.basename(f).startswith('eval')] + + logger.info('Found {} groundtruths and {} test files.'.format(len(gtfiles), len(tsfiles))) + logger.info('Available LAP solvers {}'.format(mm.lap.available_solvers)) + logger.info('Default LAP solver \'{}\''.format(mm.lap.default_solver)) + logger.info('Loading files.') + + gt = OrderedDict([(Path(f).parts[-3], mm.io.loadtxt(f, fmt='mot15-2D', min_confidence=1)) for f in gtfiles]) + ts = OrderedDict([(os.path.splitext(Path(f).parts[-1])[0], mm.io.loadtxt(f, fmt='mot15-2D', min_confidence=-1)) for f in tsfiles]) + + mh = mm.metrics.create() + accs, names = compare_dataframes(gt, ts) + + logger.info('Running metrics') + metrics = ['recall', 'precision', 'num_unique_objects', 'mostly_tracked', + 'partially_tracked', 'mostly_lost', 'num_false_positives', 'num_misses', + 'num_switches', 'num_fragmentations', 'mota', 'motp', 'num_objects'] + summary = mh.compute_many(accs, names=names, metrics=metrics, generate_overall=True) + # summary = mh.compute_many(accs, names=names, metrics=mm.metrics.motchallenge_metrics, generate_overall=True) + # print(mm.io.render_summary( + # summary, formatters=mh.formatters, + # namemap=mm.io.motchallenge_metric_names)) + div_dict = { + 'num_objects': ['num_false_positives', 'num_misses', 'num_switches', 'num_fragmentations'], + 'num_unique_objects': ['mostly_tracked', 'partially_tracked', 'mostly_lost']} + for divisor in div_dict: + for divided in div_dict[divisor]: + summary[divided] = (summary[divided] / summary[divisor]) + fmt = mh.formatters + change_fmt_list = ['num_false_positives', 'num_misses', 'num_switches', 'num_fragmentations', 'mostly_tracked', + 'partially_tracked', 'mostly_lost'] + for k in change_fmt_list: + fmt[k] = fmt['mota'] + print(mm.io.render_summary(summary, formatters=fmt, namemap=mm.io.motchallenge_metric_names)) + + metrics = mm.metrics.motchallenge_metrics + ['num_objects'] + summary = mh.compute_many(accs, names=names, metrics=metrics, generate_overall=True) + print(mm.io.render_summary(summary, formatters=mh.formatters, namemap=mm.io.motchallenge_metric_names)) + logger.info('Completed') + + +if __name__ == "__main__": + args = make_parser().parse_args() + exp = get_exp(args.exp_file, args.name) + exp.merge(args.opts) + + if not args.experiment_name: + args.experiment_name = exp.exp_name + + num_gpu = torch.cuda.device_count() if args.devices is None else args.devices + assert num_gpu <= torch.cuda.device_count() + + launch( + main, + num_gpu, + args.num_machines, + args.machine_rank, + backend=args.dist_backend, + dist_url=args.dist_url, + args=(exp, args, num_gpu), + ) diff --git a/tools/track_sort.py b/tools/track_sort.py new file mode 100644 index 0000000000000000000000000000000000000000..7a50527d30558918f121e75402ad8ea44093c5ec --- /dev/null +++ b/tools/track_sort.py @@ -0,0 +1,290 @@ +from loguru import logger + +import torch +import torch.backends.cudnn as cudnn +from torch.nn.parallel import DistributedDataParallel as DDP + +from yolox.core import launch +from yolox.exp import get_exp +from yolox.utils import configure_nccl, fuse_model, get_local_rank, get_model_info, setup_logger +from yolox.evaluators import MOTEvaluator + +import argparse +import os +import random +import warnings +import glob +import motmetrics as mm +from collections import OrderedDict +from pathlib import Path + + +def make_parser(): + parser = argparse.ArgumentParser("YOLOX Eval") + parser.add_argument("-expn", "--experiment-name", type=str, default=None) + parser.add_argument("-n", "--name", type=str, default=None, help="model name") + + # distributed + parser.add_argument( + "--dist-backend", default="nccl", type=str, help="distributed backend" + ) + parser.add_argument( + "--dist-url", + default=None, + type=str, + help="url used to set up distributed training", + ) + parser.add_argument("-b", "--batch-size", type=int, default=64, help="batch size") + parser.add_argument( + "-d", "--devices", default=None, type=int, help="device for training" + ) + parser.add_argument( + "--local_rank", default=0, type=int, help="local rank for dist training" + ) + parser.add_argument( + "--num_machines", default=1, type=int, help="num of node for training" + ) + parser.add_argument( + "--machine_rank", default=0, type=int, help="node rank for multi-node training" + ) + parser.add_argument( + "-f", + "--exp_file", + default=None, + type=str, + help="pls input your expriment description file", + ) + parser.add_argument( + "--fp16", + dest="fp16", + default=False, + action="store_true", + help="Adopting mix precision evaluating.", + ) + parser.add_argument( + "--fuse", + dest="fuse", + default=False, + action="store_true", + help="Fuse conv and bn for testing.", + ) + parser.add_argument( + "--trt", + dest="trt", + default=False, + action="store_true", + help="Using TensorRT model for testing.", + ) + parser.add_argument( + "--test", + dest="test", + default=False, + action="store_true", + help="Evaluating on test-dev set.", + ) + parser.add_argument( + "--speed", + dest="speed", + default=False, + action="store_true", + help="speed test only.", + ) + parser.add_argument( + "opts", + help="Modify config options using the command-line", + default=None, + nargs=argparse.REMAINDER, + ) + # det args + parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt for eval") + parser.add_argument("--conf", default=0.1, type=float, help="test conf") + parser.add_argument("--nms", default=0.7, type=float, help="test nms threshold") + parser.add_argument("--tsize", default=None, type=int, help="test img size") + parser.add_argument("--seed", default=None, type=int, help="eval seed") + # tracking args + parser.add_argument("--track_thresh", type=float, default=0.4, help="tracking confidence threshold") + parser.add_argument("--track_buffer", type=int, default=30, help="the frames for keep lost tracks") + parser.add_argument("--match_thresh", type=int, default=0.9, help="matching threshold for tracking") + parser.add_argument('--min-box-area', type=float, default=100, help='filter out tiny boxes') + return parser + + +def compare_dataframes(gts, ts): + accs = [] + names = [] + for k, tsacc in ts.items(): + if k in gts: + logger.info('Comparing {}...'.format(k)) + accs.append(mm.utils.compare_to_groundtruth(gts[k], tsacc, 'iou', distth=0.5)) + names.append(k) + else: + logger.warning('No ground truth for {}, skipping.'.format(k)) + + return accs, names + + +@logger.catch +def main(exp, args, num_gpu): + if args.seed is not None: + random.seed(args.seed) + torch.manual_seed(args.seed) + cudnn.deterministic = True + warnings.warn( + "You have chosen to seed testing. This will turn on the CUDNN deterministic setting, " + ) + + is_distributed = num_gpu > 1 + + # set environment variables for distributed training + cudnn.benchmark = True + + rank = args.local_rank + # rank = get_local_rank() + + file_name = os.path.join(exp.output_dir, args.experiment_name) + + if rank == 0: + os.makedirs(file_name, exist_ok=True) + + results_folder = os.path.join(file_name, "track_results_sort") + os.makedirs(results_folder, exist_ok=True) + + setup_logger(file_name, distributed_rank=rank, filename="val_log.txt", mode="a") + logger.info("Args: {}".format(args)) + + if args.conf is not None: + exp.test_conf = args.conf + if args.nms is not None: + exp.nmsthre = args.nms + if args.tsize is not None: + exp.test_size = (args.tsize, args.tsize) + + model = exp.get_model() + logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size))) + #logger.info("Model Structure:\n{}".format(str(model))) + + #evaluator = exp.get_evaluator(args.batch_size, is_distributed, args.test) + + val_loader = exp.get_eval_loader(args.batch_size, is_distributed, args.test) + evaluator = MOTEvaluator( + args=args, + dataloader=val_loader, + img_size=exp.test_size, + confthre=exp.test_conf, + nmsthre=exp.nmsthre, + num_classes=exp.num_classes, + ) + + torch.cuda.set_device(rank) + model.cuda(rank) + model.eval() + + if not args.speed and not args.trt: + if args.ckpt is None: + ckpt_file = os.path.join(file_name, "best_ckpt.pth.tar") + else: + ckpt_file = args.ckpt + logger.info("loading checkpoint") + loc = "cuda:{}".format(rank) + ckpt = torch.load(ckpt_file, map_location=loc) + # load the model state dict + model.load_state_dict(ckpt["model"]) + logger.info("loaded checkpoint done.") + + if is_distributed: + model = DDP(model, device_ids=[rank]) + + if args.fuse: + logger.info("\tFusing model...") + model = fuse_model(model) + + if args.trt: + assert ( + not args.fuse and not is_distributed and args.batch_size == 1 + ), "TensorRT model is not support model fusing and distributed inferencing!" + trt_file = os.path.join(file_name, "model_trt.pth") + assert os.path.exists( + trt_file + ), "TensorRT model is not found!\n Run tools/trt.py first!" + model.head.decode_in_inference = False + decoder = model.head.decode_outputs + else: + trt_file = None + decoder = None + + # start evaluate + *_, summary = evaluator.evaluate_sort( + model, is_distributed, args.fp16, trt_file, decoder, exp.test_size, results_folder + ) + logger.info("\n" + summary) + + # evaluate MOTA + mm.lap.default_solver = 'lap' + + gt_type = '_val_half' + #gt_type = '' + print('gt_type', gt_type) + gtfiles = glob.glob( + os.path.join('datasets/mot/train', '*/gt/gt{}.txt'.format(gt_type))) + print('gt_files', gtfiles) + tsfiles = [f for f in glob.glob(os.path.join(results_folder, '*.txt')) if not os.path.basename(f).startswith('eval')] + + logger.info('Found {} groundtruths and {} test files.'.format(len(gtfiles), len(tsfiles))) + logger.info('Available LAP solvers {}'.format(mm.lap.available_solvers)) + logger.info('Default LAP solver \'{}\''.format(mm.lap.default_solver)) + logger.info('Loading files.') + + gt = OrderedDict([(Path(f).parts[-3], mm.io.loadtxt(f, fmt='mot15-2D', min_confidence=1)) for f in gtfiles]) + ts = OrderedDict([(os.path.splitext(Path(f).parts[-1])[0], mm.io.loadtxt(f, fmt='mot15-2D', min_confidence=-1)) for f in tsfiles]) + + mh = mm.metrics.create() + accs, names = compare_dataframes(gt, ts) + + logger.info('Running metrics') + metrics = ['recall', 'precision', 'num_unique_objects', 'mostly_tracked', + 'partially_tracked', 'mostly_lost', 'num_false_positives', 'num_misses', + 'num_switches', 'num_fragmentations', 'mota', 'motp', 'num_objects'] + summary = mh.compute_many(accs, names=names, metrics=metrics, generate_overall=True) + # summary = mh.compute_many(accs, names=names, metrics=mm.metrics.motchallenge_metrics, generate_overall=True) + # print(mm.io.render_summary( + # summary, formatters=mh.formatters, + # namemap=mm.io.motchallenge_metric_names)) + div_dict = { + 'num_objects': ['num_false_positives', 'num_misses', 'num_switches', 'num_fragmentations'], + 'num_unique_objects': ['mostly_tracked', 'partially_tracked', 'mostly_lost']} + for divisor in div_dict: + for divided in div_dict[divisor]: + summary[divided] = (summary[divided] / summary[divisor]) + fmt = mh.formatters + change_fmt_list = ['num_false_positives', 'num_misses', 'num_switches', 'num_fragmentations', 'mostly_tracked', + 'partially_tracked', 'mostly_lost'] + for k in change_fmt_list: + fmt[k] = fmt['mota'] + print(mm.io.render_summary(summary, formatters=fmt, namemap=mm.io.motchallenge_metric_names)) + + metrics = mm.metrics.motchallenge_metrics + ['num_objects'] + summary = mh.compute_many(accs, names=names, metrics=metrics, generate_overall=True) + print(mm.io.render_summary(summary, formatters=mh.formatters, namemap=mm.io.motchallenge_metric_names)) + logger.info('Completed') + + +if __name__ == "__main__": + args = make_parser().parse_args() + exp = get_exp(args.exp_file, args.name) + exp.merge(args.opts) + + if not args.experiment_name: + args.experiment_name = exp.exp_name + + num_gpu = torch.cuda.device_count() if args.devices is None else args.devices + assert num_gpu <= torch.cuda.device_count() + + launch( + main, + num_gpu, + args.num_machines, + args.machine_rank, + backend=args.dist_backend, + dist_url=args.dist_url, + args=(exp, args, num_gpu), + ) diff --git a/tools/train.py b/tools/train.py new file mode 100644 index 0000000000000000000000000000000000000000..d05360911952b3f5a8e4c0e8dbd51dfff34f8ea8 --- /dev/null +++ b/tools/train.py @@ -0,0 +1,122 @@ +from loguru import logger + +import torch +import torch.backends.cudnn as cudnn + +from yolox.core import Trainer, launch +from yolox.exp import get_exp + +import argparse +import random +import warnings + + +def make_parser(): + parser = argparse.ArgumentParser("YOLOX train parser") + parser.add_argument("-expn", "--experiment-name", type=str, default=None) + parser.add_argument("-n", "--name", type=str, default=None, help="model name") + + # distributed + parser.add_argument( + "--dist-backend", default="nccl", type=str, help="distributed backend" + ) + parser.add_argument( + "--dist-url", + default=None, + type=str, + help="url used to set up distributed training", + ) + parser.add_argument("-b", "--batch-size", type=int, default=64, help="batch size") + parser.add_argument( + "-d", "--devices", default=None, type=int, help="device for training" + ) + parser.add_argument( + "--local_rank", default=0, type=int, help="local rank for dist training" + ) + parser.add_argument( + "-f", + "--exp_file", + default=None, + type=str, + help="plz input your expriment description file", + ) + parser.add_argument( + "--resume", default=False, action="store_true", help="resume training" + ) + parser.add_argument("-c", "--ckpt", default=None, type=str, help="checkpoint file") + parser.add_argument( + "-e", + "--start_epoch", + default=None, + type=int, + help="resume training start epoch", + ) + parser.add_argument( + "--num_machines", default=1, type=int, help="num of node for training" + ) + parser.add_argument( + "--machine_rank", default=0, type=int, help="node rank for multi-node training" + ) + parser.add_argument( + "--fp16", + dest="fp16", + default=True, + action="store_true", + help="Adopting mix precision training.", + ) + parser.add_argument( + "-o", + "--occupy", + dest="occupy", + default=False, + action="store_true", + help="occupy GPU memory first for training.", + ) + parser.add_argument( + "opts", + help="Modify config options using the command-line", + default=None, + nargs=argparse.REMAINDER, + ) + return parser + + +@logger.catch +def main(exp, args): + if exp.seed is not None: + random.seed(exp.seed) + torch.manual_seed(exp.seed) + cudnn.deterministic = True + warnings.warn( + "You have chosen to seed training. This will turn on the CUDNN deterministic setting, " + "which can slow down your training considerably! You may see unexpected behavior " + "when restarting from checkpoints." + ) + + # set environment variables for distributed training + cudnn.benchmark = True + + trainer = Trainer(exp, args) + trainer.train() + + +if __name__ == "__main__": + args = make_parser().parse_args() + exp = get_exp(args.exp_file, args.name) + exp.merge(args.opts) + + if not args.experiment_name: + args.experiment_name = exp.exp_name + + num_gpu = torch.cuda.device_count() if args.devices is None else args.devices + assert num_gpu <= torch.cuda.device_count() + + launch( + main, + num_gpu, + args.num_machines, + args.machine_rank, + backend=args.dist_backend, + dist_url=args.dist_url, + args=(exp, args), + ) diff --git a/tools/trt.py b/tools/trt.py new file mode 100644 index 0000000000000000000000000000000000000000..f4673e9b961cb051229fad92a32641af22e05dc9 --- /dev/null +++ b/tools/trt.py @@ -0,0 +1,74 @@ +from loguru import logger + +import tensorrt as trt +import torch +from torch2trt import torch2trt + +from yolox.exp import get_exp + +import argparse +import os +import shutil + + +def make_parser(): + parser = argparse.ArgumentParser("YOLOX ncnn deploy") + parser.add_argument("-expn", "--experiment-name", type=str, default=None) + parser.add_argument("-n", "--name", type=str, default=None, help="model name") + + parser.add_argument( + "-f", + "--exp_file", + default=None, + type=str, + help="pls input your expriment description file", + ) + parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt path") + return parser + + +@logger.catch +def main(): + args = make_parser().parse_args() + exp = get_exp(args.exp_file, args.name) + if not args.experiment_name: + args.experiment_name = exp.exp_name + + model = exp.get_model() + file_name = os.path.join(exp.output_dir, args.experiment_name) + os.makedirs(file_name, exist_ok=True) + if args.ckpt is None: + ckpt_file = os.path.join(file_name, "best_ckpt.pth.tar") + else: + ckpt_file = args.ckpt + + ckpt = torch.load(ckpt_file, map_location="cpu") + # load the model state dict + + model.load_state_dict(ckpt["model"]) + logger.info("loaded checkpoint done.") + model.eval() + model.cuda() + model.head.decode_in_inference = False + x = torch.ones(1, 3, exp.test_size[0], exp.test_size[1]).cuda() + model_trt = torch2trt( + model, + [x], + fp16_mode=True, + log_level=trt.Logger.INFO, + max_workspace_size=(1 << 32), + ) + torch.save(model_trt.state_dict(), os.path.join(file_name, "model_trt.pth")) + logger.info("Converted TensorRT model done.") + engine_file = os.path.join(file_name, "model_trt.engine") + engine_file_demo = os.path.join("deploy", "TensorRT", "cpp", "model_trt.engine") + with open(engine_file, "wb") as f: + f.write(model_trt.engine.serialize()) + + shutil.copyfile(engine_file, engine_file_demo) + + logger.info("Converted TensorRT model engine file is saved for C++ inference.") + + +if __name__ == "__main__": + main() diff --git a/tools/txt2video.py b/tools/txt2video.py new file mode 100644 index 0000000000000000000000000000000000000000..23ddd8ba8ba75e4dc46114351177aabccacd1ccb --- /dev/null +++ b/tools/txt2video.py @@ -0,0 +1,211 @@ +import os +import sys +import json +import cv2 +import glob as gb +import numpy as np + + +def colormap(rgb=False): + color_list = np.array( + [ + 0.000, 0.447, 0.741, + 0.850, 0.325, 0.098, + 0.929, 0.694, 0.125, + 0.494, 0.184, 0.556, + 0.466, 0.674, 0.188, + 0.301, 0.745, 0.933, + 0.635, 0.078, 0.184, + 0.300, 0.300, 0.300, + 0.600, 0.600, 0.600, + 1.000, 0.000, 0.000, + 1.000, 0.500, 0.000, + 0.749, 0.749, 0.000, + 0.000, 1.000, 0.000, + 0.000, 0.000, 1.000, + 0.667, 0.000, 1.000, + 0.333, 0.333, 0.000, + 0.333, 0.667, 0.000, + 0.333, 1.000, 0.000, + 0.667, 0.333, 0.000, + 0.667, 0.667, 0.000, + 0.667, 1.000, 0.000, + 1.000, 0.333, 0.000, + 1.000, 0.667, 0.000, + 1.000, 1.000, 0.000, + 0.000, 0.333, 0.500, + 0.000, 0.667, 0.500, + 0.000, 1.000, 0.500, + 0.333, 0.000, 0.500, + 0.333, 0.333, 0.500, + 0.333, 0.667, 0.500, + 0.333, 1.000, 0.500, + 0.667, 0.000, 0.500, + 0.667, 0.333, 0.500, + 0.667, 0.667, 0.500, + 0.667, 1.000, 0.500, + 1.000, 0.000, 0.500, + 1.000, 0.333, 0.500, + 1.000, 0.667, 0.500, + 1.000, 1.000, 0.500, + 0.000, 0.333, 1.000, + 0.000, 0.667, 1.000, + 0.000, 1.000, 1.000, + 0.333, 0.000, 1.000, + 0.333, 0.333, 1.000, + 0.333, 0.667, 1.000, + 0.333, 1.000, 1.000, + 0.667, 0.000, 1.000, + 0.667, 0.333, 1.000, + 0.667, 0.667, 1.000, + 0.667, 1.000, 1.000, + 1.000, 0.000, 1.000, + 1.000, 0.333, 1.000, + 1.000, 0.667, 1.000, + 0.167, 0.000, 0.000, + 0.333, 0.000, 0.000, + 0.500, 0.000, 0.000, + 0.667, 0.000, 0.000, + 0.833, 0.000, 0.000, + 1.000, 0.000, 0.000, + 0.000, 0.167, 0.000, + 0.000, 0.333, 0.000, + 0.000, 0.500, 0.000, + 0.000, 0.667, 0.000, + 0.000, 0.833, 0.000, + 0.000, 1.000, 0.000, + 0.000, 0.000, 0.167, + 0.000, 0.000, 0.333, + 0.000, 0.000, 0.500, + 0.000, 0.000, 0.667, + 0.000, 0.000, 0.833, + 0.000, 0.000, 1.000, + 0.000, 0.000, 0.000, + 0.143, 0.143, 0.143, + 0.286, 0.286, 0.286, + 0.429, 0.429, 0.429, + 0.571, 0.571, 0.571, + 0.714, 0.714, 0.714, + 0.857, 0.857, 0.857, + 1.000, 1.000, 1.000 + ] + ).astype(np.float32) + color_list = color_list.reshape((-1, 3)) * 255 + if not rgb: + color_list = color_list[:, ::-1] + return color_list + + +def txt2img(visual_path="visual_val_gt"): + print("Starting txt2img") + + valid_labels = {1} + ignore_labels = {2, 7, 8, 12} + + if not os.path.exists(visual_path): + os.makedirs(visual_path) + color_list = colormap() + + gt_json_path = 'datasets/mot/annotations/val_half.json' + img_path = 'datasets/mot/train/' + show_video_names = ['MOT17-02-FRCNN', + 'MOT17-04-FRCNN', + 'MOT17-05-FRCNN', + 'MOT17-09-FRCNN', + 'MOT17-10-FRCNN', + 'MOT17-11-FRCNN', + 'MOT17-13-FRCNN'] + + + test_json_path = 'datasets/mot/annotations/test.json' + test_img_path = 'datasets/mot/test/' + test_show_video_names = ['MOT17-01-FRCNN', + 'MOT17-03-FRCNN', + 'MOT17-06-FRCNN', + 'MOT17-07-FRCNN', + 'MOT17-08-FRCNN', + 'MOT17-12-FRCNN', + 'MOT17-14-FRCNN'] + if visual_path == "visual_test_predict": + show_video_names = test_show_video_names + img_path = test_img_path + gt_json_path = test_json_path + for show_video_name in show_video_names: + img_dict = dict() + + if visual_path == "visual_val_gt": + txt_path = 'datasets/mot/train/' + show_video_name + '/gt/gt_val_half.txt' + elif visual_path == "visual_yolox_x": + txt_path = 'YOLOX_outputs/yolox_mot_x_1088/track_results/'+ show_video_name + '.txt' + elif visual_path == "visual_test_predict": + txt_path = 'test/tracks/'+ show_video_name + '.txt' + else: + raise NotImplementedError + + with open(gt_json_path, 'r') as f: + gt_json = json.load(f) + + for ann in gt_json["images"]: + file_name = ann['file_name'] + video_name = file_name.split('/')[0] + if video_name == show_video_name: + img_dict[ann['frame_id']] = img_path + file_name + + + txt_dict = dict() + with open(txt_path, 'r') as f: + for line in f.readlines(): + linelist = line.split(',') + + mark = int(float(linelist[6])) + label = int(float(linelist[7])) + vis_ratio = float(linelist[8]) + + if visual_path == "visual_val_gt": + if mark == 0 or label not in valid_labels or label in ignore_labels or vis_ratio <= 0: + continue + + img_id = linelist[0] + obj_id = linelist[1] + bbox = [float(linelist[2]), float(linelist[3]), + float(linelist[2]) + float(linelist[4]), + float(linelist[3]) + float(linelist[5]), int(obj_id)] + if int(img_id) in txt_dict: + txt_dict[int(img_id)].append(bbox) + else: + txt_dict[int(img_id)] = list() + txt_dict[int(img_id)].append(bbox) + + for img_id in sorted(txt_dict.keys()): + img = cv2.imread(img_dict[img_id]) + for bbox in txt_dict[img_id]: + cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color_list[bbox[4]%79].tolist(), thickness=2) + cv2.putText(img, "{}".format(int(bbox[4])), (int(bbox[0]), int(bbox[1])), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color_list[bbox[4]%79].tolist(), 2) + cv2.imwrite(visual_path + "/" + show_video_name + "{:0>6d}.png".format(img_id), img) + print(show_video_name, "Done") + print("txt2img Done") + + +def img2video(visual_path="visual_val_gt"): + print("Starting img2video") + + img_paths = gb.glob(visual_path + "/*.png") + fps = 16 + size = (1920,1080) + videowriter = cv2.VideoWriter(visual_path + "_video.avi",cv2.VideoWriter_fourcc('M','J','P','G'), fps, size) + + for img_path in sorted(img_paths): + img = cv2.imread(img_path) + img = cv2.resize(img, size) + videowriter.write(img) + + videowriter.release() + print("img2video Done") + + +if __name__ == '__main__': + visual_path="visual_yolox_x" + if len(sys.argv) > 1: + visual_path =sys.argv[1] + txt2img(visual_path) + #img2video(visual_path) diff --git a/tutorials/.DS_Store b/tutorials/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..2793154069f164b3e2d102f1d12cf4d2c444e407 Binary files /dev/null and b/tutorials/.DS_Store differ diff --git a/tutorials/centertrack/README.md b/tutorials/centertrack/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b46bb2f0412c260c53d90bb5f8e5f2c387f748a5 --- /dev/null +++ b/tutorials/centertrack/README.md @@ -0,0 +1,42 @@ +# CenterTrack + +Step1. git clone https://github.com/xingyizhou/CenterTrack.git + + +Step2. + +replace https://github.com/xingyizhou/CenterTrack/blob/master/src/lib/utils/tracker.py + +replace https://github.com/xingyizhou/CenterTrack/blob/master/src/lib/opts.py + + +Step3. run +``` +python3 test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --load_model ../models/mot17_half.pth --track_thresh 0.4 --new_thresh 0.5 --out_thresh 0.2 --pre_thresh 0.5 +``` + + +# CenterTrack_BYTE + +Step1. git clone https://github.com/xingyizhou/CenterTrack.git + + +Step2. + +replace https://github.com/xingyizhou/CenterTrack/blob/master/src/lib/utils/tracker.py by byte_tracker.py + +replace https://github.com/xingyizhou/CenterTrack/blob/master/src/lib/opts.py + +add mot_online to https://github.com/xingyizhou/CenterTrack/blob/master/src/lib/utils + +Step3. run +``` +python3 test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --load_model ../models/mot17_half.pth --track_thresh 0.4 --new_thresh 0.5 --out_thresh 0.2 --pre_thresh 0.5 +``` + + +## Notes +tracker.py: only motion + +byte_tracker.py: motion with kalman filter + diff --git a/tutorials/centertrack/byte_tracker.py b/tutorials/centertrack/byte_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..8cb757e0f1e62f3ec4f2e9ab57cef2b509298dbc --- /dev/null +++ b/tutorials/centertrack/byte_tracker.py @@ -0,0 +1,363 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +from sklearn.utils.linear_assignment_ import linear_assignment +import copy +from .mot_online.kalman_filter import KalmanFilter +from .mot_online.basetrack import BaseTrack, TrackState +from .mot_online import matching + + +class STrack(BaseTrack): + shared_kalman = KalmanFilter() + def __init__(self, tlwh, score): + + # wait activate + self._tlwh = np.asarray(tlwh, dtype=np.float) + self.kalman_filter = None + self.mean, self.covariance = None, None + self.is_activated = False + + self.score = score + self.tracklet_len = 0 + + def predict(self): + mean_state = self.mean.copy() + if self.state != TrackState.Tracked: + mean_state[7] = 0 + self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) + + @staticmethod + def multi_predict(stracks): + if len(stracks) > 0: + multi_mean = np.asarray([st.mean.copy() for st in stracks]) + multi_covariance = np.asarray([st.covariance for st in stracks]) + for i, st in enumerate(stracks): + if st.state != TrackState.Tracked: + multi_mean[i][7] = 0 + multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance) + for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): + stracks[i].mean = mean + stracks[i].covariance = cov + + def activate(self, kalman_filter, frame_id): + """Start a new tracklet""" + self.kalman_filter = kalman_filter + self.track_id = self.next_id() + self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) + + self.tracklet_len = 0 + self.state = TrackState.Tracked + if frame_id == 1: + self.is_activated = True + # self.is_activated = True + self.frame_id = frame_id + self.start_frame = frame_id + + def re_activate(self, new_track, frame_id, new_id=False): + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) + ) + self.tracklet_len = 0 + self.state = TrackState.Tracked + self.is_activated = True + self.frame_id = frame_id + if new_id: + self.track_id = self.next_id() + self.score = new_track.score + + def update(self, new_track, frame_id): + """ + Update a matched track + :type new_track: STrack + :type frame_id: int + :type update_feature: bool + :return: + """ + self.frame_id = frame_id + self.tracklet_len += 1 + + new_tlwh = new_track.tlwh + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) + self.state = TrackState.Tracked + self.is_activated = True + + self.score = new_track.score + + @property + # @jit(nopython=True) + def tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + """ + if self.mean is None: + return self._tlwh.copy() + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + @property + # @jit(nopython=True) + def tlbr(self): + """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_xyah(tlwh): + """Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = np.asarray(tlwh).copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + def to_xyah(self): + return self.tlwh_to_xyah(self.tlwh) + + @staticmethod + # @jit(nopython=True) + def tlbr_to_tlwh(tlbr): + ret = np.asarray(tlbr).copy() + ret[2:] -= ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_tlbr(tlwh): + ret = np.asarray(tlwh).copy() + ret[2:] += ret[:2] + return ret + + def __repr__(self): + return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) + + + +class BYTETracker(object): + def __init__(self, args, frame_rate=30): + self.args = args + self.det_thresh = args.new_thresh + self.buffer_size = int(frame_rate / 30.0 * args.track_buffer) + self.max_time_lost = self.buffer_size + self.reset() + + # below has no effect to final output, just to be compatible to codebase + def init_track(self, results): + for item in results: + if item['score'] > self.opt.new_thresh and item['class'] == 1: + self.id_count += 1 + item['active'] = 1 + item['age'] = 1 + item['tracking_id'] = self.id_count + if not ('ct' in item): + bbox = item['bbox'] + item['ct'] = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2] + self.tracks.append(item) + + def reset(self): + self.frame_id = 0 + self.kalman_filter = KalmanFilter() + self.tracked_stracks = [] # type: list[STrack] + self.lost_stracks = [] # type: list[STrack] + self.removed_stracks = [] # type: list[STrack] + self.tracks = [] + + # below has no effect to final output, just to be compatible to codebase + self.id_count = 0 + + def step(self, results, public_det=None): + self.frame_id += 1 + activated_starcks = [] + refind_stracks = [] + lost_stracks = [] + removed_stracks = [] + detections = [] + detections_second = [] + + scores = np.array([item['score'] for item in results if item['class'] == 1], np.float32) + bboxes = np.vstack([item['bbox'] for item in results if item['class'] == 1]) # N x 4, x1y1x2y2 + + remain_inds = scores >= self.args.track_thresh + dets = bboxes[remain_inds] + scores_keep = scores[remain_inds] + + + inds_low = scores > self.args.out_thresh + inds_high = scores < self.args.track_thresh + inds_second = np.logical_and(inds_low, inds_high) + dets_second = bboxes[inds_second] + scores_second = scores[inds_second] + + if len(dets) > 0: + '''Detections''' + detections = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for + (tlbr, s) in zip(dets, scores_keep)] + else: + detections = [] + + ''' Add newly detected tracklets to tracked_stracks''' + unconfirmed = [] + tracked_stracks = [] # type: list[STrack] + for track in self.tracked_stracks: + if not track.is_activated: + unconfirmed.append(track) + else: + tracked_stracks.append(track) + + ''' Step 2: First association, with Kalman and IOU''' + strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) + # Predict the current location with KF + STrack.multi_predict(strack_pool) + dists = matching.iou_distance(strack_pool, detections) + #dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.args.match_thresh) + + for itracked, idet in matches: + track = strack_pool[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + track.update(detections[idet], self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + ''' Step 3: Second association, association the untrack to the low score detections, with IOU''' + if len(dets_second) > 0: + '''Detections''' + detections_second = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for + (tlbr, s) in zip(dets_second, scores_second)] + else: + detections_second = [] + r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] + dists = matching.iou_distance(r_tracked_stracks, detections_second) + matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5) + for itracked, idet in matches: + track = r_tracked_stracks[itracked] + det = detections_second[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + for it in u_track: + track = r_tracked_stracks[it] + if not track.state == TrackState.Lost: + track.mark_lost() + lost_stracks.append(track) + + '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' + detections = [detections[i] for i in u_detection] + dists = matching.iou_distance(unconfirmed, detections) + matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) + for itracked, idet in matches: + unconfirmed[itracked].update(detections[idet], self.frame_id) + activated_starcks.append(unconfirmed[itracked]) + for it in u_unconfirmed: + track = unconfirmed[it] + track.mark_removed() + removed_stracks.append(track) + + """ Step 4: Init new stracks""" + for inew in u_detection: + track = detections[inew] + if track.score < self.det_thresh: + continue + track.activate(self.kalman_filter, self.frame_id) + activated_starcks.append(track) + """ Step 5: Update state""" + for track in self.lost_stracks: + if self.frame_id - track.end_frame > self.max_time_lost: + track.mark_removed() + removed_stracks.append(track) + + self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] + self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) + self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) + self.lost_stracks.extend(lost_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) + self.removed_stracks.extend(removed_stracks) + self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) + output_stracks = [track for track in self.tracked_stracks if track.is_activated] + + ret = [] + for track in output_stracks: + track_dict = {} + track_dict['score'] = track.score + track_dict['bbox'] = track.tlbr + bbox = track_dict['bbox'] + track_dict['ct'] = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2] + track_dict['active'] = 1 if track.is_activated else 0 + track_dict['tracking_id'] = track.track_id + track_dict['class'] = 1 + ret.append(track_dict) + + self.tracks = ret + return ret + + +def joint_stracks(tlista, tlistb): + exists = {} + res = [] + for t in tlista: + exists[t.track_id] = 1 + res.append(t) + for t in tlistb: + tid = t.track_id + if not exists.get(tid, 0): + exists[tid] = 1 + res.append(t) + return res + + +def sub_stracks(tlista, tlistb): + stracks = {} + for t in tlista: + stracks[t.track_id] = t + for t in tlistb: + tid = t.track_id + if stracks.get(tid, 0): + del stracks[tid] + return list(stracks.values()) + + +def remove_duplicate_stracks(stracksa, stracksb): + pdist = matching.iou_distance(stracksa, stracksb) + pairs = np.where(pdist < 0.15) + dupa, dupb = list(), list() + for p, q in zip(*pairs): + timep = stracksa[p].frame_id - stracksa[p].start_frame + timeq = stracksb[q].frame_id - stracksb[q].start_frame + if timep > timeq: + dupb.append(q) + else: + dupa.append(p) + resa = [t for i, t in enumerate(stracksa) if not i in dupa] + resb = [t for i, t in enumerate(stracksb) if not i in dupb] + return resa, resb + + +def remove_fp_stracks(stracksa, n_frame=10): + remain = [] + for t in stracksa: + score_5 = t.score_list[-n_frame:] + score_5 = np.array(score_5, dtype=np.float32) + index = score_5 < 0.45 + num = np.sum(index) + if num < n_frame: + remain.append(t) + return remain + diff --git a/tutorials/centertrack/mot_online/basetrack.py b/tutorials/centertrack/mot_online/basetrack.py new file mode 100644 index 0000000000000000000000000000000000000000..4fe2233607f6d4ed28b11a0ae6c0303c8ca19098 --- /dev/null +++ b/tutorials/centertrack/mot_online/basetrack.py @@ -0,0 +1,52 @@ +import numpy as np +from collections import OrderedDict + + +class TrackState(object): + New = 0 + Tracked = 1 + Lost = 2 + Removed = 3 + + +class BaseTrack(object): + _count = 0 + + track_id = 0 + is_activated = False + state = TrackState.New + + history = OrderedDict() + features = [] + curr_feature = None + score = 0 + start_frame = 0 + frame_id = 0 + time_since_update = 0 + + # multi-camera + location = (np.inf, np.inf) + + @property + def end_frame(self): + return self.frame_id + + @staticmethod + def next_id(): + BaseTrack._count += 1 + return BaseTrack._count + + def activate(self, *args): + raise NotImplementedError + + def predict(self): + raise NotImplementedError + + def update(self, *args, **kwargs): + raise NotImplementedError + + def mark_lost(self): + self.state = TrackState.Lost + + def mark_removed(self): + self.state = TrackState.Removed diff --git a/tutorials/centertrack/mot_online/kalman_filter.py b/tutorials/centertrack/mot_online/kalman_filter.py new file mode 100644 index 0000000000000000000000000000000000000000..b4c4e9854d8abd2fea75ad6b1fe8cd6846c43680 --- /dev/null +++ b/tutorials/centertrack/mot_online/kalman_filter.py @@ -0,0 +1,269 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np +import scipy.linalg + +""" +Table for the 0.95 quantile of the chi-square distribution with N degrees of +freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv +function and used as Mahalanobis gating threshold. +""" +chi2inv95 = { + 1: 3.8415, + 2: 5.9915, + 3: 7.8147, + 4: 9.4877, + 5: 11.070, + 6: 12.592, + 7: 14.067, + 8: 15.507, + 9: 16.919} + + +class KalmanFilter(object): + """ + A simple Kalman filter for tracking bounding boxes in image space. + + The 8-dimensional state space + + x, y, a, h, vx, vy, va, vh + + contains the bounding box center position (x, y), aspect ratio a, height h, + and their respective velocities. + + Object motion follows a constant velocity model. The bounding box location + (x, y, a, h) is taken as direct observation of the state space (linear + observation model). + + """ + + def __init__(self): + ndim, dt = 4, 1. + + # Create Kalman filter model matrices. + self._motion_mat = np.eye(2 * ndim, 2 * ndim) + for i in range(ndim): + self._motion_mat[i, ndim + i] = dt + self._update_mat = np.eye(ndim, 2 * ndim) + + # Motion and observation uncertainty are chosen relative to the current + # state estimate. These weights control the amount of uncertainty in + # the model. This is a bit hacky. + self._std_weight_position = 1. / 20 + self._std_weight_velocity = 1. / 160 + + def initiate(self, measurement): + """Create track from unassociated measurement. + + Parameters + ---------- + measurement : ndarray + Bounding box coordinates (x, y, a, h) with center position (x, y), + aspect ratio a, and height h. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector (8 dimensional) and covariance matrix (8x8 + dimensional) of the new track. Unobserved velocities are initialized + to 0 mean. + + """ + mean_pos = measurement + mean_vel = np.zeros_like(mean_pos) + mean = np.r_[mean_pos, mean_vel] + + std = [ + 2 * self._std_weight_position * measurement[3], + 2 * self._std_weight_position * measurement[3], + 1e-2, + 2 * self._std_weight_position * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 1e-5, + 10 * self._std_weight_velocity * measurement[3]] + covariance = np.diag(np.square(std)) + return mean, covariance + + def predict(self, mean, covariance): + """Run Kalman filter prediction step. + + Parameters + ---------- + mean : ndarray + The 8 dimensional mean vector of the object state at the previous + time step. + covariance : ndarray + The 8x8 dimensional covariance matrix of the object state at the + previous time step. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + + """ + std_pos = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-2, + self._std_weight_position * mean[3]] + std_vel = [ + self._std_weight_velocity * mean[3], + self._std_weight_velocity * mean[3], + 1e-5, + self._std_weight_velocity * mean[3]] + motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) + + #mean = np.dot(self._motion_mat, mean) + mean = np.dot(mean, self._motion_mat.T) + covariance = np.linalg.multi_dot(( + self._motion_mat, covariance, self._motion_mat.T)) + motion_cov + + return mean, covariance + + def project(self, mean, covariance): + """Project state distribution to measurement space. + + Parameters + ---------- + mean : ndarray + The state's mean vector (8 dimensional array). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + + Returns + ------- + (ndarray, ndarray) + Returns the projected mean and covariance matrix of the given state + estimate. + + """ + std = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-1, + self._std_weight_position * mean[3]] + innovation_cov = np.diag(np.square(std)) + + mean = np.dot(self._update_mat, mean) + covariance = np.linalg.multi_dot(( + self._update_mat, covariance, self._update_mat.T)) + return mean, covariance + innovation_cov + + def multi_predict(self, mean, covariance): + """Run Kalman filter prediction step (Vectorized version). + Parameters + ---------- + mean : ndarray + The Nx8 dimensional mean matrix of the object states at the previous + time step. + covariance : ndarray + The Nx8x8 dimensional covariance matrics of the object states at the + previous time step. + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[:, 3], + self._std_weight_position * mean[:, 3], + 1e-2 * np.ones_like(mean[:, 3]), + self._std_weight_position * mean[:, 3]] + std_vel = [ + self._std_weight_velocity * mean[:, 3], + self._std_weight_velocity * mean[:, 3], + 1e-5 * np.ones_like(mean[:, 3]), + self._std_weight_velocity * mean[:, 3]] + sqr = np.square(np.r_[std_pos, std_vel]).T + + motion_cov = [] + for i in range(len(mean)): + motion_cov.append(np.diag(sqr[i])) + motion_cov = np.asarray(motion_cov) + + mean = np.dot(mean, self._motion_mat.T) + left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2)) + covariance = np.dot(left, self._motion_mat.T) + motion_cov + + return mean, covariance + + def update(self, mean, covariance, measurement): + """Run Kalman filter correction step. + + Parameters + ---------- + mean : ndarray + The predicted state's mean vector (8 dimensional). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + measurement : ndarray + The 4 dimensional measurement vector (x, y, a, h), where (x, y) + is the center position, a the aspect ratio, and h the height of the + bounding box. + + Returns + ------- + (ndarray, ndarray) + Returns the measurement-corrected state distribution. + + """ + projected_mean, projected_cov = self.project(mean, covariance) + + chol_factor, lower = scipy.linalg.cho_factor( + projected_cov, lower=True, check_finite=False) + kalman_gain = scipy.linalg.cho_solve( + (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, + check_finite=False).T + innovation = measurement - projected_mean + + new_mean = mean + np.dot(innovation, kalman_gain.T) + new_covariance = covariance - np.linalg.multi_dot(( + kalman_gain, projected_cov, kalman_gain.T)) + return new_mean, new_covariance + + def gating_distance(self, mean, covariance, measurements, + only_position=False, metric='maha'): + """Compute gating distance between state distribution and measurements. + A suitable distance threshold can be obtained from `chi2inv95`. If + `only_position` is False, the chi-square distribution has 4 degrees of + freedom, otherwise 2. + Parameters + ---------- + mean : ndarray + Mean vector over the state distribution (8 dimensional). + covariance : ndarray + Covariance of the state distribution (8x8 dimensional). + measurements : ndarray + An Nx4 dimensional matrix of N measurements, each in + format (x, y, a, h) where (x, y) is the bounding box center + position, a the aspect ratio, and h the height. + only_position : Optional[bool] + If True, distance computation is done with respect to the bounding + box center position only. + Returns + ------- + ndarray + Returns an array of length N, where the i-th element contains the + squared Mahalanobis distance between (mean, covariance) and + `measurements[i]`. + """ + mean, covariance = self.project(mean, covariance) + if only_position: + mean, covariance = mean[:2], covariance[:2, :2] + measurements = measurements[:, :2] + + d = measurements - mean + if metric == 'gaussian': + return np.sum(d * d, axis=1) + elif metric == 'maha': + cholesky_factor = np.linalg.cholesky(covariance) + z = scipy.linalg.solve_triangular( + cholesky_factor, d.T, lower=True, check_finite=False, + overwrite_b=True) + squared_maha = np.sum(z * z, axis=0) + return squared_maha + else: + raise ValueError('invalid distance metric') diff --git a/tutorials/centertrack/mot_online/matching.py b/tutorials/centertrack/mot_online/matching.py new file mode 100644 index 0000000000000000000000000000000000000000..54cb4be09624cdb68581508bdbdeecdc63539b7c --- /dev/null +++ b/tutorials/centertrack/mot_online/matching.py @@ -0,0 +1,198 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import lap +import numpy as np +import scipy +from cython_bbox import bbox_overlaps as bbox_ious +from scipy.spatial.distance import cdist + +chi2inv95 = { + 1: 3.8415, + 2: 5.9915, + 3: 7.8147, + 4: 9.4877, + 5: 11.070, + 6: 12.592, + 7: 14.067, + 8: 15.507, + 9: 16.919} + +def merge_matches(m1, m2, shape): + O,P,Q = shape + m1 = np.asarray(m1) + m2 = np.asarray(m2) + + M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P)) + M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q)) + + mask = M1*M2 + match = mask.nonzero() + match = list(zip(match[0], match[1])) + unmatched_O = tuple(set(range(O)) - set([i for i, j in match])) + unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match])) + + return match, unmatched_O, unmatched_Q + + +def _indices_to_matches(cost_matrix, indices, thresh): + matched_cost = cost_matrix[tuple(zip(*indices))] + matched_mask = (matched_cost <= thresh) + + matches = indices[matched_mask] + unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0])) + unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1])) + + return matches, unmatched_a, unmatched_b + + +def linear_assignment(cost_matrix, thresh): + if cost_matrix.size == 0: + return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) + matches, unmatched_a, unmatched_b = [], [], [] + cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) + for ix, mx in enumerate(x): + if mx >= 0: + matches.append([ix, mx]) + unmatched_a = np.where(x < 0)[0] + unmatched_b = np.where(y < 0)[0] + matches = np.asarray(matches) + return matches, unmatched_a, unmatched_b + + +def ious(atlbrs, btlbrs): + """ + Compute cost based on IoU + :type atlbrs: list[tlbr] | np.ndarray + :type atlbrs: list[tlbr] | np.ndarray + + :rtype ious np.ndarray + """ + ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) + if ious.size == 0: + return ious + + ious = bbox_ious( + np.ascontiguousarray(atlbrs, dtype=np.float), + np.ascontiguousarray(btlbrs, dtype=np.float) + ) + + return ious + + +def iou_distance(atracks, btracks): + """ + Compute cost based on IoU + :type atracks: list[STrack] + :type btracks: list[STrack] + + :rtype cost_matrix np.ndarray + """ + + if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): + atlbrs = atracks + btlbrs = btracks + else: + atlbrs = [track.tlbr for track in atracks] + btlbrs = [track.tlbr for track in btracks] + _ious = ious(atlbrs, btlbrs) + cost_matrix = 1 - _ious + + return cost_matrix + +def embedding_distance(tracks, detections, metric='cosine'): + """ + :param tracks: list[STrack] + :param detections: list[BaseTrack] + :param metric: + :return: cost_matrix np.ndarray + """ + + cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) + if cost_matrix.size == 0: + return cost_matrix + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + #for i, track in enumerate(tracks): + #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + return cost_matrix + +def embedding_distance2(tracks, detections, metric='cosine'): + """ + :param tracks: list[STrack] + :param detections: list[BaseTrack] + :param metric: + :return: cost_matrix np.ndarray + """ + + cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) + if cost_matrix.size == 0: + return cost_matrix + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + #for i, track in enumerate(tracks): + #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + track_features = np.asarray([track.features[0] for track in tracks], dtype=np.float) + cost_matrix2 = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + track_features = np.asarray([track.features[len(track.features)-1] for track in tracks], dtype=np.float) + cost_matrix3 = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + for row in range(len(cost_matrix)): + cost_matrix[row] = (cost_matrix[row]+cost_matrix2[row]+cost_matrix3[row])/3 + return cost_matrix + + +def vis_id_feature_A_distance(tracks, detections, metric='cosine'): + track_features = [] + det_features = [] + leg1 = len(tracks) + leg2 = len(detections) + cost_matrix = np.zeros((leg1, leg2), dtype=np.float) + cost_matrix_det = np.zeros((leg1, leg2), dtype=np.float) + cost_matrix_track = np.zeros((leg1, leg2), dtype=np.float) + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + if leg2 != 0: + cost_matrix_det = np.maximum(0.0, cdist(det_features, det_features, metric)) + if leg1 != 0: + cost_matrix_track = np.maximum(0.0, cdist(track_features, track_features, metric)) + if cost_matrix.size == 0: + return track_features, det_features, cost_matrix, cost_matrix_det, cost_matrix_track + cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) + if leg1 > 10: + leg1 = 10 + tracks = tracks[:10] + if leg2 > 10: + leg2 = 10 + detections = detections[:10] + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + return track_features, det_features, cost_matrix, cost_matrix_det, cost_matrix_track + +def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False): + if cost_matrix.size == 0: + return cost_matrix + gating_dim = 2 if only_position else 4 + gating_threshold = chi2inv95[gating_dim] + measurements = np.asarray([det.to_xyah() for det in detections]) + for row, track in enumerate(tracks): + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position) + cost_matrix[row, gating_distance > gating_threshold] = np.inf + return cost_matrix + + +def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98): + if cost_matrix.size == 0: + return cost_matrix + gating_dim = 2 if only_position else 4 + gating_threshold = chi2inv95[gating_dim] + measurements = np.asarray([det.to_xyah() for det in detections]) + for row, track in enumerate(tracks): + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position, metric='maha') + cost_matrix[row, gating_distance > gating_threshold] = np.inf + cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance + return cost_matrix diff --git a/tutorials/centertrack/opts.py b/tutorials/centertrack/opts.py new file mode 100644 index 0000000000000000000000000000000000000000..5d54fe39ff696933e1391c531868f8b73865b690 --- /dev/null +++ b/tutorials/centertrack/opts.py @@ -0,0 +1,406 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import os +import sys + +class opts(object): + def __init__(self): + self.parser = argparse.ArgumentParser() + # basic experiment setting + self.parser.add_argument('task', default='', + help='ctdet | ddd | multi_pose ' + '| tracking or combined with ,') + self.parser.add_argument('--dataset', default='coco', + help='see lib/dataset/dataset_facotry for ' + + 'available datasets') + self.parser.add_argument('--test_dataset', default='', + help='coco | kitti | coco_hp | pascal') + self.parser.add_argument('--exp_id', default='default') + self.parser.add_argument('--test', action='store_true') + self.parser.add_argument('--debug', type=int, default=0, + help='level of visualization.' + '1: only show the final detection results' + '2: show the network output features' + '3: use matplot to display' # useful when lunching training with ipython notebook + '4: save all visualizations to disk') + self.parser.add_argument('--no_pause', action='store_true') + self.parser.add_argument('--demo', default='', + help='path to image/ image folders/ video. ' + 'or "webcam"') + self.parser.add_argument('--load_model', default='', + help='path to pretrained model') + self.parser.add_argument('--resume', action='store_true', + help='resume an experiment. ' + 'Reloaded the optimizer parameter and ' + 'set load_model to model_last.pth ' + 'in the exp dir if load_model is empty.') + + # system + self.parser.add_argument('--gpus', default='0', + help='-1 for CPU, use comma for multiple gpus') + self.parser.add_argument('--num_workers', type=int, default=4, + help='dataloader threads. 0 for single-thread.') + self.parser.add_argument('--not_cuda_benchmark', action='store_true', + help='disable when the input size is not fixed.') + self.parser.add_argument('--seed', type=int, default=317, + help='random seed') # from CornerNet + self.parser.add_argument('--not_set_cuda_env', action='store_true', + help='used when training in slurm clusters.') + + # log + self.parser.add_argument('--print_iter', type=int, default=0, + help='disable progress bar and print to screen.') + self.parser.add_argument('--save_all', action='store_true', + help='save model to disk every 5 epochs.') + self.parser.add_argument('--vis_thresh', type=float, default=0.3, + help='visualization threshold.') + self.parser.add_argument('--debugger_theme', default='white', + choices=['white', 'black']) + self.parser.add_argument('--eval_val', action='store_true') + self.parser.add_argument('--save_imgs', default='', help='') + self.parser.add_argument('--save_img_suffix', default='', help='') + self.parser.add_argument('--skip_first', type=int, default=-1, help='') + self.parser.add_argument('--save_video', action='store_true') + self.parser.add_argument('--save_framerate', type=int, default=30) + self.parser.add_argument('--resize_video', action='store_true') + self.parser.add_argument('--video_h', type=int, default=512, help='') + self.parser.add_argument('--video_w', type=int, default=512, help='') + self.parser.add_argument('--transpose_video', action='store_true') + self.parser.add_argument('--show_track_color', action='store_true') + self.parser.add_argument('--not_show_bbox', action='store_true') + self.parser.add_argument('--not_show_number', action='store_true') + self.parser.add_argument('--not_show_txt', action='store_true') + self.parser.add_argument('--qualitative', action='store_true') + self.parser.add_argument('--tango_color', action='store_true') + self.parser.add_argument('--only_show_dots', action='store_true') + self.parser.add_argument('--show_trace', action='store_true') + + # model + self.parser.add_argument('--arch', default='dla_34', + help='model architecture. Currently tested' + 'res_18 | res_101 | resdcn_18 | resdcn_101 |' + 'dlav0_34 | dla_34 | hourglass') + self.parser.add_argument('--dla_node', default='dcn') + self.parser.add_argument('--head_conv', type=int, default=-1, + help='conv layer channels for output head' + '0 for no conv layer' + '-1 for default setting: ' + '64 for resnets and 256 for dla.') + self.parser.add_argument('--num_head_conv', type=int, default=1) + self.parser.add_argument('--head_kernel', type=int, default=3, help='') + self.parser.add_argument('--down_ratio', type=int, default=4, + help='output stride. Currently only supports 4.') + self.parser.add_argument('--not_idaup', action='store_true') + self.parser.add_argument('--num_classes', type=int, default=-1) + self.parser.add_argument('--num_layers', type=int, default=101) + self.parser.add_argument('--backbone', default='dla34') + self.parser.add_argument('--neck', default='dlaup') + self.parser.add_argument('--msra_outchannel', type=int, default=256) + self.parser.add_argument('--efficient_level', type=int, default=0) + self.parser.add_argument('--prior_bias', type=float, default=-4.6) # -2.19 + + # input + self.parser.add_argument('--input_res', type=int, default=-1, + help='input height and width. -1 for default from ' + 'dataset. Will be overriden by input_h | input_w') + self.parser.add_argument('--input_h', type=int, default=-1, + help='input height. -1 for default from dataset.') + self.parser.add_argument('--input_w', type=int, default=-1, + help='input width. -1 for default from dataset.') + self.parser.add_argument('--dataset_version', default='') + + # train + self.parser.add_argument('--optim', default='adam') + self.parser.add_argument('--lr', type=float, default=1.25e-4, + help='learning rate for batch size 32.') + self.parser.add_argument('--lr_step', type=str, default='60', + help='drop learning rate by 10.') + self.parser.add_argument('--save_point', type=str, default='90', + help='when to save the model to disk.') + self.parser.add_argument('--num_epochs', type=int, default=70, + help='total training epochs.') + self.parser.add_argument('--batch_size', type=int, default=32, + help='batch size') + self.parser.add_argument('--master_batch_size', type=int, default=-1, + help='batch size on the master gpu.') + self.parser.add_argument('--num_iters', type=int, default=-1, + help='default: #samples / batch_size.') + self.parser.add_argument('--val_intervals', type=int, default=10000, + help='number of epochs to run validation.') + self.parser.add_argument('--trainval', action='store_true', + help='include validation in training and ' + 'test on test set') + self.parser.add_argument('--ltrb', action='store_true', + help='') + self.parser.add_argument('--ltrb_weight', type=float, default=0.1, + help='') + self.parser.add_argument('--reset_hm', action='store_true') + self.parser.add_argument('--reuse_hm', action='store_true') + self.parser.add_argument('--use_kpt_center', action='store_true') + self.parser.add_argument('--add_05', action='store_true') + self.parser.add_argument('--dense_reg', type=int, default=1, help='') + + # test + self.parser.add_argument('--flip_test', action='store_true', + help='flip data augmentation.') + self.parser.add_argument('--test_scales', type=str, default='1', + help='multi scale test augmentation.') + self.parser.add_argument('--nms', action='store_true', + help='run nms in testing.') + self.parser.add_argument('--K', type=int, default=100, + help='max number of output objects.') + self.parser.add_argument('--not_prefetch_test', action='store_true', + help='not use parallal data pre-processing.') + self.parser.add_argument('--fix_short', type=int, default=-1) + self.parser.add_argument('--keep_res', action='store_true', + help='keep the original resolution' + ' during validation.') + self.parser.add_argument('--map_argoverse_id', action='store_true', + help='if trained on nuscenes and eval on kitti') + self.parser.add_argument('--out_thresh', type=float, default=-1, + help='') + self.parser.add_argument('--depth_scale', type=float, default=1, + help='') + self.parser.add_argument('--save_results', action='store_true') + self.parser.add_argument('--load_results', default='') + self.parser.add_argument('--use_loaded_results', action='store_true') + self.parser.add_argument('--ignore_loaded_cats', default='') + self.parser.add_argument('--model_output_list', action='store_true', + help='Used when convert to onnx') + self.parser.add_argument('--non_block_test', action='store_true') + self.parser.add_argument('--vis_gt_bev', default='', help='') + self.parser.add_argument('--kitti_split', default='3dop', + help='different validation split for kitti: ' + '3dop | subcnn') + self.parser.add_argument('--test_focal_length', type=int, default=-1) + + # dataset + self.parser.add_argument('--not_rand_crop', action='store_true', + help='not use the random crop data augmentation' + 'from CornerNet.') + self.parser.add_argument('--not_max_crop', action='store_true', + help='used when the training dataset has' + 'inbalanced aspect ratios.') + self.parser.add_argument('--shift', type=float, default=0, + help='when not using random crop, 0.1' + 'apply shift augmentation.') + self.parser.add_argument('--scale', type=float, default=0, + help='when not using random crop, 0.4' + 'apply scale augmentation.') + self.parser.add_argument('--aug_rot', type=float, default=0, + help='probability of applying ' + 'rotation augmentation.') + self.parser.add_argument('--rotate', type=float, default=0, + help='when not using random crop' + 'apply rotation augmentation.') + self.parser.add_argument('--flip', type=float, default=0.5, + help='probability of applying flip augmentation.') + self.parser.add_argument('--no_color_aug', action='store_true', + help='not use the color augmenation ' + 'from CornerNet') + + # Tracking + self.parser.add_argument('--tracking', action='store_true') + self.parser.add_argument('--pre_hm', action='store_true') + self.parser.add_argument('--same_aug_pre', action='store_true') + self.parser.add_argument('--zero_pre_hm', action='store_true') + self.parser.add_argument('--hm_disturb', type=float, default=0) + self.parser.add_argument('--lost_disturb', type=float, default=0) + self.parser.add_argument('--fp_disturb', type=float, default=0) + self.parser.add_argument('--pre_thresh', type=float, default=-1) + self.parser.add_argument('--track_thresh', type=float, default=0.3) + self.parser.add_argument('--match_thresh', type=float, default=0.8) + self.parser.add_argument('--track_buffer', type=int, default=30) + self.parser.add_argument('--new_thresh', type=float, default=0.3) + self.parser.add_argument('--max_frame_dist', type=int, default=3) + self.parser.add_argument('--ltrb_amodal', action='store_true') + self.parser.add_argument('--ltrb_amodal_weight', type=float, default=0.1) + self.parser.add_argument('--public_det', action='store_true') + self.parser.add_argument('--no_pre_img', action='store_true') + self.parser.add_argument('--zero_tracking', action='store_true') + self.parser.add_argument('--hungarian', action='store_true') + self.parser.add_argument('--max_age', type=int, default=-1) + + + # loss + self.parser.add_argument('--tracking_weight', type=float, default=1) + self.parser.add_argument('--reg_loss', default='l1', + help='regression loss: sl1 | l1 | l2') + self.parser.add_argument('--hm_weight', type=float, default=1, + help='loss weight for keypoint heatmaps.') + self.parser.add_argument('--off_weight', type=float, default=1, + help='loss weight for keypoint local offsets.') + self.parser.add_argument('--wh_weight', type=float, default=0.1, + help='loss weight for bounding box size.') + self.parser.add_argument('--hp_weight', type=float, default=1, + help='loss weight for human pose offset.') + self.parser.add_argument('--hm_hp_weight', type=float, default=1, + help='loss weight for human keypoint heatmap.') + self.parser.add_argument('--amodel_offset_weight', type=float, default=1, + help='Please forgive the typo.') + self.parser.add_argument('--dep_weight', type=float, default=1, + help='loss weight for depth.') + self.parser.add_argument('--dim_weight', type=float, default=1, + help='loss weight for 3d bounding box size.') + self.parser.add_argument('--rot_weight', type=float, default=1, + help='loss weight for orientation.') + self.parser.add_argument('--nuscenes_att', action='store_true') + self.parser.add_argument('--nuscenes_att_weight', type=float, default=1) + self.parser.add_argument('--velocity', action='store_true') + self.parser.add_argument('--velocity_weight', type=float, default=1) + + # custom dataset + self.parser.add_argument('--custom_dataset_img_path', default='') + self.parser.add_argument('--custom_dataset_ann_path', default='') + self.parser.add_argument('--bird_view_world_size', type=int, default=64) + + def parse(self, args=''): + if args == '': + opt = self.parser.parse_args() + else: + opt = self.parser.parse_args(args) + + if opt.test_dataset == '': + opt.test_dataset = opt.dataset + + opt.gpus_str = opt.gpus + opt.gpus = [int(gpu) for gpu in opt.gpus.split(',')] + opt.gpus = [i for i in range(len(opt.gpus))] if opt.gpus[0] >=0 else [-1] + opt.lr_step = [int(i) for i in opt.lr_step.split(',')] + opt.save_point = [int(i) for i in opt.save_point.split(',')] + opt.test_scales = [float(i) for i in opt.test_scales.split(',')] + opt.save_imgs = [i for i in opt.save_imgs.split(',')] \ + if opt.save_imgs != '' else [] + opt.ignore_loaded_cats = \ + [int(i) for i in opt.ignore_loaded_cats.split(',')] \ + if opt.ignore_loaded_cats != '' else [] + + opt.num_workers = max(opt.num_workers, 2 * len(opt.gpus)) + opt.pre_img = False + if 'tracking' in opt.task: + print('Running tracking') + opt.tracking = True +# opt.out_thresh = max(opt.track_thresh, opt.out_thresh) +# opt.pre_thresh = max(opt.track_thresh, opt.pre_thresh) +# opt.new_thresh = max(opt.track_thresh, opt.new_thresh) + opt.pre_img = not opt.no_pre_img + print('Using tracking threshold for out threshold!', opt.track_thresh) + if 'ddd' in opt.task: + opt.show_track_color = True + + opt.fix_res = not opt.keep_res + print('Fix size testing.' if opt.fix_res else 'Keep resolution testing.') + + if opt.head_conv == -1: # init default head_conv + opt.head_conv = 256 if 'dla' in opt.arch else 64 + + opt.pad = 127 if 'hourglass' in opt.arch else 31 + opt.num_stacks = 2 if opt.arch == 'hourglass' else 1 + + if opt.master_batch_size == -1: + opt.master_batch_size = opt.batch_size // len(opt.gpus) + rest_batch_size = (opt.batch_size - opt.master_batch_size) + opt.chunk_sizes = [opt.master_batch_size] + for i in range(len(opt.gpus) - 1): + slave_chunk_size = rest_batch_size // (len(opt.gpus) - 1) + if i < rest_batch_size % (len(opt.gpus) - 1): + slave_chunk_size += 1 + opt.chunk_sizes.append(slave_chunk_size) + print('training chunk_sizes:', opt.chunk_sizes) + + if opt.debug > 0: + opt.num_workers = 0 + opt.batch_size = 1 + opt.gpus = [opt.gpus[0]] + opt.master_batch_size = -1 + + # log dirs + opt.root_dir = os.path.join(os.path.dirname(__file__), '..', '..') + opt.data_dir = os.path.join(opt.root_dir, 'data') + opt.exp_dir = os.path.join(opt.root_dir, 'exp', opt.task) + opt.save_dir = os.path.join(opt.exp_dir, opt.exp_id) + opt.debug_dir = os.path.join(opt.save_dir, 'debug') + + if opt.resume and opt.load_model == '': + opt.load_model = os.path.join(opt.save_dir, 'model_last.pth') + return opt + + + def update_dataset_info_and_set_heads(self, opt, dataset): + opt.num_classes = dataset.num_categories \ + if opt.num_classes < 0 else opt.num_classes + # input_h(w): opt.input_h overrides opt.input_res overrides dataset default + input_h, input_w = dataset.default_resolution + input_h = opt.input_res if opt.input_res > 0 else input_h + input_w = opt.input_res if opt.input_res > 0 else input_w + opt.input_h = opt.input_h if opt.input_h > 0 else input_h + opt.input_w = opt.input_w if opt.input_w > 0 else input_w + opt.output_h = opt.input_h // opt.down_ratio + opt.output_w = opt.input_w // opt.down_ratio + opt.input_res = max(opt.input_h, opt.input_w) + opt.output_res = max(opt.output_h, opt.output_w) + + opt.heads = {'hm': opt.num_classes, 'reg': 2, 'wh': 2} + + if 'tracking' in opt.task: + opt.heads.update({'tracking': 2}) + + if 'ddd' in opt.task: + opt.heads.update({'dep': 1, 'rot': 8, 'dim': 3, 'amodel_offset': 2}) + + if 'multi_pose' in opt.task: + opt.heads.update({ + 'hps': dataset.num_joints * 2, 'hm_hp': dataset.num_joints, + 'hp_offset': 2}) + + if opt.ltrb: + opt.heads.update({'ltrb': 4}) + if opt.ltrb_amodal: + opt.heads.update({'ltrb_amodal': 4}) + if opt.nuscenes_att: + opt.heads.update({'nuscenes_att': 8}) + if opt.velocity: + opt.heads.update({'velocity': 3}) + + weight_dict = {'hm': opt.hm_weight, 'wh': opt.wh_weight, + 'reg': opt.off_weight, 'hps': opt.hp_weight, + 'hm_hp': opt.hm_hp_weight, 'hp_offset': opt.off_weight, + 'dep': opt.dep_weight, 'rot': opt.rot_weight, + 'dim': opt.dim_weight, + 'amodel_offset': opt.amodel_offset_weight, + 'ltrb': opt.ltrb_weight, + 'tracking': opt.tracking_weight, + 'ltrb_amodal': opt.ltrb_amodal_weight, + 'nuscenes_att': opt.nuscenes_att_weight, + 'velocity': opt.velocity_weight} + opt.weights = {head: weight_dict[head] for head in opt.heads} + for head in opt.weights: + if opt.weights[head] == 0: + del opt.heads[head] + opt.head_conv = {head: [opt.head_conv \ + for i in range(opt.num_head_conv if head != 'reg' else 1)] for head in opt.heads} + + print('input h w:', opt.input_h, opt.input_w) + print('heads', opt.heads) + print('weights', opt.weights) + print('head conv', opt.head_conv) + + return opt + + def init(self, args=''): + # only used in demo + default_dataset_info = { + 'ctdet': 'coco', 'multi_pose': 'coco_hp', 'ddd': 'nuscenes', + 'tracking,ctdet': 'coco', 'tracking,multi_pose': 'coco_hp', + 'tracking,ddd': 'nuscenes' + } + opt = self.parse() + from dataset.dataset_factory import dataset_factory + train_dataset = default_dataset_info[opt.task] \ + if opt.task in default_dataset_info else 'coco' + dataset = dataset_factory[train_dataset] + opt = self.update_dataset_info_and_set_heads(opt, dataset) + return opt diff --git a/tutorials/centertrack/tracker.py b/tutorials/centertrack/tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..22a746528ae84416423d7e1ec5b7d93429560b5d --- /dev/null +++ b/tutorials/centertrack/tracker.py @@ -0,0 +1,198 @@ +import numpy as np +from sklearn.utils.linear_assignment_ import linear_assignment +# from numba import jit +import copy + + +class Tracker(object): + def __init__(self, opt): + self.opt = opt + self.reset() + + def init_track(self, results): + for item in results: + if item['score'] > self.opt.new_thresh: + self.id_count += 1 + # active and age are never used in the paper + item['active'] = 1 + item['age'] = 1 + item['tracking_id'] = self.id_count + if not ('ct' in item): + bbox = item['bbox'] + item['ct'] = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2] + self.tracks.append(item) + + def reset(self): + self.id_count = 0 + self.tracks = [] + + def step(self, results_with_low, public_det=None): + + results = [item for item in results_with_low if item['score'] >= self.opt.track_thresh] + + # first association + N = len(results) + M = len(self.tracks) + + dets = np.array( + [det['ct'] + det['tracking'] for det in results], np.float32) # N x 2 + track_size = np.array([((track['bbox'][2] - track['bbox'][0]) * \ + (track['bbox'][3] - track['bbox'][1])) \ + for track in self.tracks], np.float32) # M + track_cat = np.array([track['class'] for track in self.tracks], np.int32) # M + item_size = np.array([((item['bbox'][2] - item['bbox'][0]) * \ + (item['bbox'][3] - item['bbox'][1])) \ + for item in results], np.float32) # N + item_cat = np.array([item['class'] for item in results], np.int32) # N + tracks = np.array( + [pre_det['ct'] for pre_det in self.tracks], np.float32) # M x 2 + dist = (((tracks.reshape(1, -1, 2) - \ + dets.reshape(-1, 1, 2)) ** 2).sum(axis=2)) # N x M + + invalid = ((dist > track_size.reshape(1, M)) + \ + (dist > item_size.reshape(N, 1)) + \ + (item_cat.reshape(N, 1) != track_cat.reshape(1, M))) > 0 + dist = dist + invalid * 1e18 + + if self.opt.hungarian: + assert not self.opt.hungarian, 'we only verify centertrack with greedy_assignment' + item_score = np.array([item['score'] for item in results], np.float32) # N + dist[dist > 1e18] = 1e18 + matched_indices = linear_assignment(dist) + else: + matched_indices = greedy_assignment(copy.deepcopy(dist)) + + unmatched_dets = [d for d in range(dets.shape[0]) \ + if not (d in matched_indices[:, 0])] + unmatched_tracks = [d for d in range(tracks.shape[0]) \ + if not (d in matched_indices[:, 1])] + + if self.opt.hungarian: + assert not self.opt.hungarian, 'we only verify centertrack with greedy_assignment' + matches = [] + for m in matched_indices: + if dist[m[0], m[1]] > 1e16: + unmatched_dets.append(m[0]) + unmatched_tracks.append(m[1]) + else: + matches.append(m) + matches = np.array(matches).reshape(-1, 2) + else: + matches = matched_indices + + ret = [] + for m in matches: + track = results[m[0]] + track['tracking_id'] = self.tracks[m[1]]['tracking_id'] + track['age'] = 1 + track['active'] = self.tracks[m[1]]['active'] + 1 + ret.append(track) + + if self.opt.public_det and len(unmatched_dets) > 0: + assert not self.opt.public_det, 'we only verify centertrack with private detection' + # Public detection: only create tracks from provided detections + pub_dets = np.array([d['ct'] for d in public_det], np.float32) + dist3 = ((dets.reshape(-1, 1, 2) - pub_dets.reshape(1, -1, 2)) ** 2).sum( + axis=2) + matched_dets = [d for d in range(dets.shape[0]) \ + if not (d in unmatched_dets)] + dist3[matched_dets] = 1e18 + for j in range(len(pub_dets)): + i = dist3[:, j].argmin() + if dist3[i, j] < item_size[i]: + dist3[i, :] = 1e18 + track = results[i] + if track['score'] > self.opt.new_thresh: + self.id_count += 1 + track['tracking_id'] = self.id_count + track['age'] = 1 + track['active'] = 1 + ret.append(track) + else: + # Private detection: create tracks for all un-matched detections + for i in unmatched_dets: + track = results[i] + if track['score'] > self.opt.new_thresh: + self.id_count += 1 + track['tracking_id'] = self.id_count + track['age'] = 1 + track['active'] = 1 + ret.append(track) + + # second association + results_second = [item for item in results_with_low if item['score'] < self.opt.track_thresh] + + self_tracks_second = [self.tracks[i] for i in unmatched_tracks if self.tracks[i]['active'] > 0] + second2original = [i for i in unmatched_tracks if self.tracks[i]['active'] > 0] + + N = len(results_second) + M = len(self_tracks_second) + + if N > 0 and M > 0: + dets = np.array( + [det['ct'] + det['tracking'] for det in results_second], np.float32) # N x 2 + track_size = np.array([((track['bbox'][2] - track['bbox'][0]) * \ + (track['bbox'][3] - track['bbox'][1])) \ + for track in self_tracks_second], np.float32) # M + track_cat = np.array([track['class'] for track in self_tracks_second], np.int32) # M + item_size = np.array([((item['bbox'][2] - item['bbox'][0]) * \ + (item['bbox'][3] - item['bbox'][1])) \ + for item in results_second], np.float32) # N + item_cat = np.array([item['class'] for item in results_second], np.int32) # N + tracks_second = np.array( + [pre_det['ct'] for pre_det in self_tracks_second], np.float32) # M x 2 + dist = (((tracks_second.reshape(1, -1, 2) - \ + dets.reshape(-1, 1, 2)) ** 2).sum(axis=2)) # N x M + + invalid = ((dist > track_size.reshape(1, M)) + \ + (dist > item_size.reshape(N, 1)) + \ + (item_cat.reshape(N, 1) != track_cat.reshape(1, M))) > 0 + dist = dist + invalid * 1e18 + + matched_indices_second = greedy_assignment(copy.deepcopy(dist), 1e8) + + unmatched_tracks_second = [d for d in range(tracks_second.shape[0]) \ + if not (d in matched_indices_second[:, 1])] + matches_second = matched_indices_second + + for m in matches_second: + track = results_second[m[0]] + track['tracking_id'] = self_tracks_second[m[1]]['tracking_id'] + track['age'] = 1 + track['active'] = self_tracks_second[m[1]]['active'] + 1 + ret.append(track) + + unmatched_tracks = [second2original[i] for i in unmatched_tracks_second] + \ + [i for i in unmatched_tracks if self.tracks[i]['active'] == 0] + +#. for debug +# unmatched_tracks = [i for i in unmatched_tracks if self.tracks[i]['active'] > 0] + \ +# [i for i in unmatched_tracks if self.tracks[i]['active'] == 0] + + for i in unmatched_tracks: + track = self.tracks[i] + if track['age'] < self.opt.max_age: + track['age'] += 1 + track['active'] = 0 + bbox = track['bbox'] + ct = track['ct'] + v = [0, 0] + track['bbox'] = [ + bbox[0] + v[0], bbox[1] + v[1], + bbox[2] + v[0], bbox[3] + v[1]] + track['ct'] = [ct[0] + v[0], ct[1] + v[1]] + ret.append(track) + self.tracks = ret + return ret + + +def greedy_assignment(dist, thresh=1e16): + matched_indices = [] + if dist.shape[1] == 0: + return np.array(matched_indices, np.int32).reshape(-1, 2) + for i in range(dist.shape[0]): + j = dist[i].argmin() + if dist[i][j] < thresh: + dist[:, j] = 1e18 + matched_indices.append([i, j]) + return np.array(matched_indices, np.int32).reshape(-1, 2) diff --git a/tutorials/cstrack/README.md b/tutorials/cstrack/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fdb66c1955791274891905aa620ec10636c86d6f --- /dev/null +++ b/tutorials/cstrack/README.md @@ -0,0 +1,28 @@ +# CSTrack + +Step1. git clone https://github.com/JudasDie/SOTS.git + + +Step2. replace https://github.com/JudasDie/SOTS/blob/master/lib/tracker/cstrack.py + + +Step3. download cstrack model trained on MIX and MOT17_half (mix_mot17_half_cstrack.pt): [google](https://drive.google.com/file/d/1OG5PDj_CYmMiw3dN6pZ0FsgqY__CIDx1/view?usp=sharing), [baidu(code:0bsu)](https://pan.baidu.com/s/1Z2VnE-OhZIPmgX6-4r9Z1Q) + + +Step4. run BYTE tracker example: +``` +python3 test_cstrack.py --val_mot17 True --val_hf 2 --weights weights/mix_mot17_half_cstrack.pt --conf_thres 0.7 --data_cfg ../src/lib/cfg/mot17_hf.json --data_dir your/data/path +``` + + +## Notes +byte_tracker: only motion + +tracker: motion + reid + + + + + + + diff --git a/tutorials/cstrack/byte_tracker.py b/tutorials/cstrack/byte_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..ae2af402f2780e77ef0fa7ef24cbee8bf62c4a94 --- /dev/null +++ b/tutorials/cstrack/byte_tracker.py @@ -0,0 +1,500 @@ +from collections import deque +import os +import cv2 +import numpy as np +import torch +import torch.nn.functional as F +from torchsummary import summary + +from core.mot.general import non_max_suppression_and_inds, non_max_suppression_jde, non_max_suppression, scale_coords +from core.mot.torch_utils import intersect_dicts +from models.mot.cstrack import Model + +from mot_online import matching +from mot_online.kalman_filter import KalmanFilter +from mot_online.log import logger +from mot_online.utils import * + +from mot_online.basetrack import BaseTrack, TrackState + + +class STrack(BaseTrack): + shared_kalman = KalmanFilter() + def __init__(self, tlwh, score): + + # wait activate + self._tlwh = np.asarray(tlwh, dtype=np.float) + self.kalman_filter = None + self.mean, self.covariance = None, None + self.is_activated = False + + self.score = score + self.tracklet_len = 0 + + def predict(self): + mean_state = self.mean.copy() + if self.state != TrackState.Tracked: + mean_state[7] = 0 + self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) + + @staticmethod + def multi_predict(stracks): + if len(stracks) > 0: + multi_mean = np.asarray([st.mean.copy() for st in stracks]) + multi_covariance = np.asarray([st.covariance for st in stracks]) + for i, st in enumerate(stracks): + if st.state != TrackState.Tracked: + multi_mean[i][7] = 0 + multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance) + for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): + stracks[i].mean = mean + stracks[i].covariance = cov + + def activate(self, kalman_filter, frame_id): + """Start a new tracklet""" + self.kalman_filter = kalman_filter + self.track_id = self.next_id() + self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) + + self.tracklet_len = 0 + self.state = TrackState.Tracked + #self.is_activated = True + self.frame_id = frame_id + self.start_frame = frame_id + + def re_activate(self, new_track, frame_id, new_id=False): + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) + ) + + self.tracklet_len = 0 + self.state = TrackState.Tracked + self.is_activated = True + self.frame_id = frame_id + if new_id: + self.track_id = self.next_id() + + def update(self, new_track, frame_id): + """ + Update a matched track + :type new_track: STrack + :type frame_id: int + :type update_feature: bool + :return: + """ + self.frame_id = frame_id + self.tracklet_len += 1 + + new_tlwh = new_track.tlwh + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) + self.state = TrackState.Tracked + self.is_activated = True + + self.score = new_track.score + + @property + # @jit(nopython=True) + def tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + """ + if self.mean is None: + return self._tlwh.copy() + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + @property + # @jit(nopython=True) + def tlbr(self): + """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_xyah(tlwh): + """Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = np.asarray(tlwh).copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + def to_xyah(self): + return self.tlwh_to_xyah(self.tlwh) + + @staticmethod + # @jit(nopython=True) + def tlbr_to_tlwh(tlbr): + ret = np.asarray(tlbr).copy() + ret[2:] -= ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_tlbr(tlwh): + ret = np.asarray(tlwh).copy() + ret[2:] += ret[:2] + return ret + + def __repr__(self): + return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) + + +class BYTETracker(object): + def __init__(self, opt, frame_rate=30): + self.opt = opt + if int(opt.gpus[0]) >= 0: + opt.device = torch.device('cuda') + else: + opt.device = torch.device('cpu') + print('Creating model...') + + ckpt = torch.load(opt.weights, map_location=opt.device) # load checkpoint + self.model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=1).to(opt.device) # create + exclude = ['anchor'] if opt.cfg else [] # exclude keys + if type(ckpt['model']).__name__ == "OrderedDict": + state_dict = ckpt['model'] + else: + state_dict = ckpt['model'].float().state_dict() # to FP32 + state_dict = intersect_dicts(state_dict, self.model.state_dict(), exclude=exclude) # intersect + self.model.load_state_dict(state_dict, strict=False) # load + self.model.cuda().eval() + total_params = sum(p.numel() for p in self.model.parameters()) + print(f'{total_params:,} total parameters.') + + + self.tracked_stracks = [] # type: list[STrack] + self.lost_stracks = [] # type: list[STrack] + self.removed_stracks = [] # type: list[STrack] + + self.frame_id = 0 + self.det_thresh = opt.conf_thres + self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer) + self.max_time_lost = self.buffer_size + self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3) + self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3) + + self.kalman_filter = KalmanFilter() + self.low_thres = 0.2 + self.high_thres = self.opt.conf_thres + 0.1 + + def update(self, im_blob, img0,seq_num, save_dir): + self.frame_id += 1 + activated_starcks = [] + refind_stracks = [] + lost_stracks = [] + removed_stracks = [] + dets = [] + + ''' Step 1: Network forward, get detections & embeddings''' + with torch.no_grad(): + output = self.model(im_blob, augment=False) + pred, train_out = output[1] + + pred = pred[pred[:, :, 4] > self.low_thres] + detections = [] + if len(pred) > 0: + dets,x_inds,y_inds = non_max_suppression_and_inds(pred[:,:6].unsqueeze(0), 0.1, self.opt.nms_thres,method='cluster_diou') + if len(dets) != 0: + scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round() + + remain_inds = dets[:, 4] > self.opt.conf_thres + inds_low = dets[:, 4] > self.low_thres + inds_high = dets[:, 4] < self.opt.conf_thres + inds_second = np.logical_and(inds_low, inds_high) + dets_second = dets[inds_second] + dets = dets[remain_inds] + + detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4]) for + tlbrs in dets[:, :5]] + + else: + detections = [] + dets_second = [] + id_feature_second = [] + + ''' Add newly detected tracklets to tracked_stracks''' + unconfirmed = [] + tracked_stracks = [] # type: list[STrack] + for track in self.tracked_stracks: + if not track.is_activated: + unconfirmed.append(track) + else: + tracked_stracks.append(track) + + ''' Step 2: First association, with embedding''' + strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) + # Predict the current location with KF + STrack.multi_predict(strack_pool) + dists = matching.iou_distance(strack_pool, detections) + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.8) + + for itracked, idet in matches: + track = strack_pool[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + track.update(detections[idet], self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + # vis + track_features, det_features, cost_matrix, cost_matrix_det, cost_matrix_track = [],[],[],[],[] + if self.opt.vis_state == 1 and self.frame_id % 20 == 0: + if len(dets) != 0: + for i in range(0, dets.shape[0]): + bbox = dets[i][0:4] + cv2.rectangle(img0, (int(bbox[0]), int(bbox[1])),(int(bbox[2]), int(bbox[3])),(0, 255, 0), 2) + track_features, det_features, cost_matrix, cost_matrix_det, cost_matrix_track = matching.vis_id_feature_A_distance(strack_pool, detections) + vis_feature(self.frame_id,seq_num,img0,track_features, + det_features, cost_matrix, cost_matrix_det, cost_matrix_track, max_num=5, out_path=save_dir) + + ''' Step 3: Second association, with IOU''' + + # association the untrack to the low score detections + if len(dets_second) > 0: + detections_second = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4]) for + tlbrs in dets_second[:, :5]] + else: + detections_second = [] + r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] + dists = matching.iou_distance(r_tracked_stracks, detections_second) + matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.4) + for itracked, idet in matches: + track = r_tracked_stracks[itracked] + det = detections_second[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + for it in u_track: + track = r_tracked_stracks[it] + if not track.state == TrackState.Lost: + track.mark_lost() + lost_stracks.append(track) + + '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' + detections = [detections[i] for i in u_detection] + dists = matching.iou_distance(unconfirmed, detections) + matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) + for itracked, idet in matches: + unconfirmed[itracked].update(detections[idet], self.frame_id) + activated_starcks.append(unconfirmed[itracked]) + for it in u_unconfirmed: + track = unconfirmed[it] + track.mark_removed() + removed_stracks.append(track) + + """ Step 4: Init new stracks""" + for inew in u_detection: + track = detections[inew] + if track.score < self.high_thres: + continue + track.activate(self.kalman_filter, self.frame_id) + activated_starcks.append(track) + """ Step 5: Update state""" + for track in self.lost_stracks: + if self.frame_id - track.end_frame > self.max_time_lost: + track.mark_removed() + removed_stracks.append(track) + + # print('Ramained match {} s'.format(t4-t3)) + + self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] + self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) + self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) + self.lost_stracks.extend(lost_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) + self.removed_stracks.extend(removed_stracks) + self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) + # get scores of lost tracks + output_stracks = [track for track in self.tracked_stracks if track.is_activated] + + logger.debug('===========Frame {}=========='.format(self.frame_id)) + logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) + logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) + logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) + logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) + + return output_stracks + + +def joint_stracks(tlista, tlistb): + exists = {} + res = [] + for t in tlista: + exists[t.track_id] = 1 + res.append(t) + for t in tlistb: + tid = t.track_id + if not exists.get(tid, 0): + exists[tid] = 1 + res.append(t) + return res + + +def sub_stracks(tlista, tlistb): + stracks = {} + for t in tlista: + stracks[t.track_id] = t + for t in tlistb: + tid = t.track_id + if stracks.get(tid, 0): + del stracks[tid] + return list(stracks.values()) + + +def remove_duplicate_stracks(stracksa, stracksb): + pdist = matching.iou_distance(stracksa, stracksb) + pairs = np.where(pdist < 0.15) + dupa, dupb = list(), list() + for p, q in zip(*pairs): + timep = stracksa[p].frame_id - stracksa[p].start_frame + timeq = stracksb[q].frame_id - stracksb[q].start_frame + if timep > timeq: + dupb.append(q) + else: + dupa.append(p) + resa = [t for i, t in enumerate(stracksa) if not i in dupa] + resb = [t for i, t in enumerate(stracksb) if not i in dupb] + return resa, resb + +def vis_feature(frame_id,seq_num,img,track_features, det_features, cost_matrix, cost_matrix_det, cost_matrix_track,max_num=5, out_path='/home/XX/'): + num_zero = ["0000","000","00","0"] + img = cv2.resize(img, (778, 435)) + + if len(det_features) != 0: + max_f = det_features.max() + min_f = det_features.min() + det_features = np.round((det_features - min_f) / (max_f - min_f) * 255) + det_features = det_features.astype(np.uint8) + d_F_M = [] + cutpff_line = [40]*512 + for d_f in det_features: + for row in range(45): + d_F_M += [[40]*3+d_f.tolist()+[40]*3] + for row in range(3): + d_F_M += [[40]*3+cutpff_line+[40]*3] + d_F_M = np.array(d_F_M) + d_F_M = d_F_M.astype(np.uint8) + det_features_img = cv2.applyColorMap(d_F_M, cv2.COLORMAP_JET) + feature_img2 = cv2.resize(det_features_img, (435, 435)) + #cv2.putText(feature_img2, "det_features", (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + else: + feature_img2 = np.zeros((435, 435)) + feature_img2 = feature_img2.astype(np.uint8) + feature_img2 = cv2.applyColorMap(feature_img2, cv2.COLORMAP_JET) + #cv2.putText(feature_img2, "det_features", (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + feature_img = np.concatenate((img, feature_img2), axis=1) + + if len(cost_matrix_det) != 0 and len(cost_matrix_det[0]) != 0: + max_f = cost_matrix_det.max() + min_f = cost_matrix_det.min() + cost_matrix_det = np.round((cost_matrix_det - min_f) / (max_f - min_f) * 255) + d_F_M = [] + cutpff_line = [40]*len(cost_matrix_det)*10 + for c_m in cost_matrix_det: + add = [] + for row in range(len(c_m)): + add += [255-c_m[row]]*10 + for row in range(10): + d_F_M += [[40]+add+[40]] + d_F_M = np.array(d_F_M) + d_F_M = d_F_M.astype(np.uint8) + cost_matrix_det_img = cv2.applyColorMap(d_F_M, cv2.COLORMAP_JET) + feature_img2 = cv2.resize(cost_matrix_det_img, (435, 435)) + #cv2.putText(feature_img2, "cost_matrix_det", (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + else: + feature_img2 = np.zeros((435, 435)) + feature_img2 = feature_img2.astype(np.uint8) + feature_img2 = cv2.applyColorMap(feature_img2, cv2.COLORMAP_JET) + #cv2.putText(feature_img2, "cost_matrix_det", (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + feature_img = np.concatenate((feature_img, feature_img2), axis=1) + + if len(track_features) != 0: + max_f = track_features.max() + min_f = track_features.min() + track_features = np.round((track_features - min_f) / (max_f - min_f) * 255) + track_features = track_features.astype(np.uint8) + d_F_M = [] + cutpff_line = [40]*512 + for d_f in track_features: + for row in range(45): + d_F_M += [[40]*3+d_f.tolist()+[40]*3] + for row in range(3): + d_F_M += [[40]*3+cutpff_line+[40]*3] + d_F_M = np.array(d_F_M) + d_F_M = d_F_M.astype(np.uint8) + track_features_img = cv2.applyColorMap(d_F_M, cv2.COLORMAP_JET) + feature_img2 = cv2.resize(track_features_img, (435, 435)) + #cv2.putText(feature_img2, "track_features", (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + else: + feature_img2 = np.zeros((435, 435)) + feature_img2 = feature_img2.astype(np.uint8) + feature_img2 = cv2.applyColorMap(feature_img2, cv2.COLORMAP_JET) + #cv2.putText(feature_img2, "track_features", (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + feature_img = np.concatenate((feature_img, feature_img2), axis=1) + + if len(cost_matrix_track) != 0 and len(cost_matrix_track[0]) != 0: + max_f = cost_matrix_track.max() + min_f = cost_matrix_track.min() + cost_matrix_track = np.round((cost_matrix_track - min_f) / (max_f - min_f) * 255) + d_F_M = [] + cutpff_line = [40]*len(cost_matrix_track)*10 + for c_m in cost_matrix_track: + add = [] + for row in range(len(c_m)): + add += [255-c_m[row]]*10 + for row in range(10): + d_F_M += [[40]+add+[40]] + d_F_M = np.array(d_F_M) + d_F_M = d_F_M.astype(np.uint8) + cost_matrix_track_img = cv2.applyColorMap(d_F_M, cv2.COLORMAP_JET) + feature_img2 = cv2.resize(cost_matrix_track_img, (435, 435)) + #cv2.putText(feature_img2, "cost_matrix_track", (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + else: + feature_img2 = np.zeros((435, 435)) + feature_img2 = feature_img2.astype(np.uint8) + feature_img2 = cv2.applyColorMap(feature_img2, cv2.COLORMAP_JET) + #cv2.putText(feature_img2, "cost_matrix_track", (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + feature_img = np.concatenate((feature_img, feature_img2), axis=1) + + if len(cost_matrix) != 0 and len(cost_matrix[0]) != 0: + max_f = cost_matrix.max() + min_f = cost_matrix.min() + cost_matrix = np.round((cost_matrix - min_f) / (max_f - min_f) * 255) + d_F_M = [] + cutpff_line = [40]*len(cost_matrix[0])*10 + for c_m in cost_matrix: + add = [] + for row in range(len(c_m)): + add += [255-c_m[row]]*10 + for row in range(10): + d_F_M += [[40]+add+[40]] + d_F_M = np.array(d_F_M) + d_F_M = d_F_M.astype(np.uint8) + cost_matrix_img = cv2.applyColorMap(d_F_M, cv2.COLORMAP_JET) + feature_img2 = cv2.resize(cost_matrix_img, (435, 435)) + #cv2.putText(feature_img2, "cost_matrix", (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + else: + feature_img2 = np.zeros((435, 435)) + feature_img2 = feature_img2.astype(np.uint8) + feature_img2 = cv2.applyColorMap(feature_img2, cv2.COLORMAP_JET) + #cv2.putText(feature_img2, "cost_matrix", (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + feature_img = np.concatenate((feature_img, feature_img2), axis=1) + + dst_path = out_path + "/" + seq_num + "_" + num_zero[len(str(frame_id))-1] + str(frame_id) + '.png' + cv2.imwrite(dst_path, feature_img) \ No newline at end of file diff --git a/tutorials/cstrack/tracker.py b/tutorials/cstrack/tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..67b7b49600993b016cc877abf0ceabd1a7942520 --- /dev/null +++ b/tutorials/cstrack/tracker.py @@ -0,0 +1,542 @@ +from collections import deque +import os +import cv2 +import numpy as np +import torch +import torch.nn.functional as F +from torchsummary import summary + +from core.mot.general import non_max_suppression_and_inds, non_max_suppression_jde, non_max_suppression, scale_coords +from core.mot.torch_utils import intersect_dicts +from models.mot.cstrack import Model + +from mot_online import matching +from mot_online.kalman_filter import KalmanFilter +from mot_online.log import logger +from mot_online.utils import * + +from mot_online.basetrack import BaseTrack, TrackState + + +class STrack(BaseTrack): + shared_kalman = KalmanFilter() + def __init__(self, tlwh, score, temp_feat, buffer_size=30): + + # wait activate + self._tlwh = np.asarray(tlwh, dtype=np.float) + self.kalman_filter = None + self.mean, self.covariance = None, None + self.is_activated = False + + self.score = score + self.tracklet_len = 0 + + self.smooth_feat = None + self.update_features(temp_feat) + self.features = deque([], maxlen=buffer_size) + self.alpha = 0.9 + + def update_features(self, feat): + feat /= np.linalg.norm(feat) + self.curr_feat = feat + if self.smooth_feat is None: + self.smooth_feat = feat + else: + self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat + self.features.append(feat) + self.smooth_feat /= np.linalg.norm(self.smooth_feat) + + def predict(self): + mean_state = self.mean.copy() + if self.state != TrackState.Tracked: + mean_state[7] = 0 + self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) + + @staticmethod + def multi_predict(stracks): + if len(stracks) > 0: + multi_mean = np.asarray([st.mean.copy() for st in stracks]) + multi_covariance = np.asarray([st.covariance for st in stracks]) + for i, st in enumerate(stracks): + if st.state != TrackState.Tracked: + multi_mean[i][7] = 0 + multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance) + for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): + stracks[i].mean = mean + stracks[i].covariance = cov + + def activate(self, kalman_filter, frame_id): + """Start a new tracklet""" + self.kalman_filter = kalman_filter + self.track_id = self.next_id() + self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) + + self.tracklet_len = 0 + self.state = TrackState.Tracked + #self.is_activated = True + self.frame_id = frame_id + self.start_frame = frame_id + + def re_activate(self, new_track, frame_id, new_id=False): + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) + ) + + self.update_features(new_track.curr_feat) + self.tracklet_len = 0 + self.state = TrackState.Tracked + self.is_activated = True + self.frame_id = frame_id + if new_id: + self.track_id = self.next_id() + + def update(self, new_track, frame_id, update_feature=True): + """ + Update a matched track + :type new_track: STrack + :type frame_id: int + :type update_feature: bool + :return: + """ + self.frame_id = frame_id + self.tracklet_len += 1 + + new_tlwh = new_track.tlwh + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) + self.state = TrackState.Tracked + self.is_activated = True + + self.score = new_track.score + if update_feature: + self.update_features(new_track.curr_feat) + + @property + # @jit(nopython=True) + def tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + """ + if self.mean is None: + return self._tlwh.copy() + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + @property + # @jit(nopython=True) + def tlbr(self): + """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_xyah(tlwh): + """Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = np.asarray(tlwh).copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + def to_xyah(self): + return self.tlwh_to_xyah(self.tlwh) + + @staticmethod + # @jit(nopython=True) + def tlbr_to_tlwh(tlbr): + ret = np.asarray(tlbr).copy() + ret[2:] -= ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_tlbr(tlwh): + ret = np.asarray(tlwh).copy() + ret[2:] += ret[:2] + return ret + + def __repr__(self): + return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) + + +class JDETracker(object): + def __init__(self, opt, frame_rate=30): + self.opt = opt + if int(opt.gpus[0]) >= 0: + opt.device = torch.device('cuda') + else: + opt.device = torch.device('cpu') + print('Creating model...') + + ckpt = torch.load(opt.weights, map_location=opt.device) # load checkpoint + self.model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=1).to(opt.device) # create + exclude = ['anchor'] if opt.cfg else [] # exclude keys + if type(ckpt['model']).__name__ == "OrderedDict": + state_dict = ckpt['model'] + else: + state_dict = ckpt['model'].float().state_dict() # to FP32 + state_dict = intersect_dicts(state_dict, self.model.state_dict(), exclude=exclude) # intersect + self.model.load_state_dict(state_dict, strict=False) # load + self.model.cuda().eval() + total_params = sum(p.numel() for p in self.model.parameters()) + print(f'{total_params:,} total parameters.') + + + self.tracked_stracks = [] # type: list[STrack] + self.lost_stracks = [] # type: list[STrack] + self.removed_stracks = [] # type: list[STrack] + + self.frame_id = 0 + self.det_thresh = opt.conf_thres + self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer) + self.max_time_lost = self.buffer_size + self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3) + self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3) + + self.kalman_filter = KalmanFilter() + self.low_thres = 0.2 + self.high_thres = self.opt.conf_thres + 0.1 + + def update(self, im_blob, img0,seq_num, save_dir): + self.frame_id += 1 + activated_starcks = [] + refind_stracks = [] + lost_stracks = [] + removed_stracks = [] + dets = [] + + ''' Step 1: Network forward, get detections & embeddings''' + with torch.no_grad(): + output = self.model(im_blob, augment=False) + pred, train_out = output[1] + + pred = pred[pred[:, :, 4] > self.low_thres] + detections = [] + if len(pred) > 0: + dets,x_inds,y_inds = non_max_suppression_and_inds(pred[:,:6].unsqueeze(0), 0.1, self.opt.nms_thres,method='cluster_diou') + if len(dets) != 0: + scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round() + id_feature = output[0][0, y_inds, x_inds, :].cpu().numpy() + + remain_inds = dets[:, 4] > self.opt.conf_thres + inds_low = dets[:, 4] > self.low_thres + inds_high = dets[:, 4] < self.opt.conf_thres + inds_second = np.logical_and(inds_low, inds_high) + dets_second = dets[inds_second] + if id_feature.shape[0] == 1: + id_feature_second = id_feature + else: + id_feature_second = id_feature[inds_second] + dets = dets[remain_inds] + id_feature = id_feature[remain_inds] + + detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for + (tlbrs, f) in zip(dets[:, :5], id_feature)] + + else: + detections = [] + dets_second = [] + id_feature_second = [] + + ''' Add newly detected tracklets to tracked_stracks''' + unconfirmed = [] + tracked_stracks = [] # type: list[STrack] + for track in self.tracked_stracks: + if not track.is_activated: + unconfirmed.append(track) + else: + tracked_stracks.append(track) + + ''' Step 2: First association, with embedding''' + strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) + # Predict the current location with KF + #for strack in strack_pool: + #strack.predict() + STrack.multi_predict(strack_pool) + dists = matching.embedding_distance(strack_pool, detections) + dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) + #dists = matching.iou_distance(strack_pool, detections) + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.4) + + for itracked, idet in matches: + track = strack_pool[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + track.update(detections[idet], self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + # vis + track_features, det_features, cost_matrix, cost_matrix_det, cost_matrix_track = [],[],[],[],[] + if self.opt.vis_state == 1 and self.frame_id % 20 == 0: + if len(dets) != 0: + for i in range(0, dets.shape[0]): + bbox = dets[i][0:4] + cv2.rectangle(img0, (int(bbox[0]), int(bbox[1])),(int(bbox[2]), int(bbox[3])),(0, 255, 0), 2) + track_features, det_features, cost_matrix, cost_matrix_det, cost_matrix_track = matching.vis_id_feature_A_distance(strack_pool, detections) + vis_feature(self.frame_id,seq_num,img0,track_features, + det_features, cost_matrix, cost_matrix_det, cost_matrix_track, max_num=5, out_path=save_dir) + + ''' Step 3: Second association, with IOU''' + detections = [detections[i] for i in u_detection] + r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] + dists = matching.iou_distance(r_tracked_stracks, detections) + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) + + for itracked, idet in matches: + track = r_tracked_stracks[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + # association the untrack to the low score detections + if len(dets_second) > 0: + detections_second = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for + (tlbrs, f) in zip(dets_second[:, :5], id_feature_second)] + else: + detections_second = [] + second_tracked_stracks = [r_tracked_stracks[i] for i in u_track if r_tracked_stracks[i].state == TrackState.Tracked] + dists = matching.iou_distance(second_tracked_stracks, detections_second) + matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.4) + for itracked, idet in matches: + track = second_tracked_stracks[itracked] + det = detections_second[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + for it in u_track: + track = second_tracked_stracks[it] + if not track.state == TrackState.Lost: + track.mark_lost() + lost_stracks.append(track) + + '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' + detections = [detections[i] for i in u_detection] + dists = matching.iou_distance(unconfirmed, detections) + matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) + for itracked, idet in matches: + unconfirmed[itracked].update(detections[idet], self.frame_id) + activated_starcks.append(unconfirmed[itracked]) + for it in u_unconfirmed: + track = unconfirmed[it] + track.mark_removed() + removed_stracks.append(track) + + """ Step 4: Init new stracks""" + for inew in u_detection: + track = detections[inew] + if track.score < self.high_thres: + continue + track.activate(self.kalman_filter, self.frame_id) + activated_starcks.append(track) + """ Step 5: Update state""" + for track in self.lost_stracks: + if self.frame_id - track.end_frame > self.max_time_lost: + track.mark_removed() + removed_stracks.append(track) + + # print('Ramained match {} s'.format(t4-t3)) + + self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] + self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) + self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) + self.lost_stracks.extend(lost_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) + self.removed_stracks.extend(removed_stracks) + self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) + # get scores of lost tracks + output_stracks = [track for track in self.tracked_stracks if track.is_activated] + + logger.debug('===========Frame {}=========='.format(self.frame_id)) + logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) + logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) + logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) + logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) + + return output_stracks + + +def joint_stracks(tlista, tlistb): + exists = {} + res = [] + for t in tlista: + exists[t.track_id] = 1 + res.append(t) + for t in tlistb: + tid = t.track_id + if not exists.get(tid, 0): + exists[tid] = 1 + res.append(t) + return res + + +def sub_stracks(tlista, tlistb): + stracks = {} + for t in tlista: + stracks[t.track_id] = t + for t in tlistb: + tid = t.track_id + if stracks.get(tid, 0): + del stracks[tid] + return list(stracks.values()) + + +def remove_duplicate_stracks(stracksa, stracksb): + pdist = matching.iou_distance(stracksa, stracksb) + pairs = np.where(pdist < 0.15) + dupa, dupb = list(), list() + for p, q in zip(*pairs): + timep = stracksa[p].frame_id - stracksa[p].start_frame + timeq = stracksb[q].frame_id - stracksb[q].start_frame + if timep > timeq: + dupb.append(q) + else: + dupa.append(p) + resa = [t for i, t in enumerate(stracksa) if not i in dupa] + resb = [t for i, t in enumerate(stracksb) if not i in dupb] + return resa, resb + +def vis_feature(frame_id,seq_num,img,track_features, det_features, cost_matrix, cost_matrix_det, cost_matrix_track,max_num=5, out_path='/home/XX/'): + num_zero = ["0000","000","00","0"] + img = cv2.resize(img, (778, 435)) + + if len(det_features) != 0: + max_f = det_features.max() + min_f = det_features.min() + det_features = np.round((det_features - min_f) / (max_f - min_f) * 255) + det_features = det_features.astype(np.uint8) + d_F_M = [] + cutpff_line = [40]*512 + for d_f in det_features: + for row in range(45): + d_F_M += [[40]*3+d_f.tolist()+[40]*3] + for row in range(3): + d_F_M += [[40]*3+cutpff_line+[40]*3] + d_F_M = np.array(d_F_M) + d_F_M = d_F_M.astype(np.uint8) + det_features_img = cv2.applyColorMap(d_F_M, cv2.COLORMAP_JET) + feature_img2 = cv2.resize(det_features_img, (435, 435)) + #cv2.putText(feature_img2, "det_features", (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + else: + feature_img2 = np.zeros((435, 435)) + feature_img2 = feature_img2.astype(np.uint8) + feature_img2 = cv2.applyColorMap(feature_img2, cv2.COLORMAP_JET) + #cv2.putText(feature_img2, "det_features", (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + feature_img = np.concatenate((img, feature_img2), axis=1) + + if len(cost_matrix_det) != 0 and len(cost_matrix_det[0]) != 0: + max_f = cost_matrix_det.max() + min_f = cost_matrix_det.min() + cost_matrix_det = np.round((cost_matrix_det - min_f) / (max_f - min_f) * 255) + d_F_M = [] + cutpff_line = [40]*len(cost_matrix_det)*10 + for c_m in cost_matrix_det: + add = [] + for row in range(len(c_m)): + add += [255-c_m[row]]*10 + for row in range(10): + d_F_M += [[40]+add+[40]] + d_F_M = np.array(d_F_M) + d_F_M = d_F_M.astype(np.uint8) + cost_matrix_det_img = cv2.applyColorMap(d_F_M, cv2.COLORMAP_JET) + feature_img2 = cv2.resize(cost_matrix_det_img, (435, 435)) + #cv2.putText(feature_img2, "cost_matrix_det", (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + else: + feature_img2 = np.zeros((435, 435)) + feature_img2 = feature_img2.astype(np.uint8) + feature_img2 = cv2.applyColorMap(feature_img2, cv2.COLORMAP_JET) + #cv2.putText(feature_img2, "cost_matrix_det", (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + feature_img = np.concatenate((feature_img, feature_img2), axis=1) + + if len(track_features) != 0: + max_f = track_features.max() + min_f = track_features.min() + track_features = np.round((track_features - min_f) / (max_f - min_f) * 255) + track_features = track_features.astype(np.uint8) + d_F_M = [] + cutpff_line = [40]*512 + for d_f in track_features: + for row in range(45): + d_F_M += [[40]*3+d_f.tolist()+[40]*3] + for row in range(3): + d_F_M += [[40]*3+cutpff_line+[40]*3] + d_F_M = np.array(d_F_M) + d_F_M = d_F_M.astype(np.uint8) + track_features_img = cv2.applyColorMap(d_F_M, cv2.COLORMAP_JET) + feature_img2 = cv2.resize(track_features_img, (435, 435)) + #cv2.putText(feature_img2, "track_features", (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + else: + feature_img2 = np.zeros((435, 435)) + feature_img2 = feature_img2.astype(np.uint8) + feature_img2 = cv2.applyColorMap(feature_img2, cv2.COLORMAP_JET) + #cv2.putText(feature_img2, "track_features", (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + feature_img = np.concatenate((feature_img, feature_img2), axis=1) + + if len(cost_matrix_track) != 0 and len(cost_matrix_track[0]) != 0: + max_f = cost_matrix_track.max() + min_f = cost_matrix_track.min() + cost_matrix_track = np.round((cost_matrix_track - min_f) / (max_f - min_f) * 255) + d_F_M = [] + cutpff_line = [40]*len(cost_matrix_track)*10 + for c_m in cost_matrix_track: + add = [] + for row in range(len(c_m)): + add += [255-c_m[row]]*10 + for row in range(10): + d_F_M += [[40]+add+[40]] + d_F_M = np.array(d_F_M) + d_F_M = d_F_M.astype(np.uint8) + cost_matrix_track_img = cv2.applyColorMap(d_F_M, cv2.COLORMAP_JET) + feature_img2 = cv2.resize(cost_matrix_track_img, (435, 435)) + #cv2.putText(feature_img2, "cost_matrix_track", (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + else: + feature_img2 = np.zeros((435, 435)) + feature_img2 = feature_img2.astype(np.uint8) + feature_img2 = cv2.applyColorMap(feature_img2, cv2.COLORMAP_JET) + #cv2.putText(feature_img2, "cost_matrix_track", (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + feature_img = np.concatenate((feature_img, feature_img2), axis=1) + + if len(cost_matrix) != 0 and len(cost_matrix[0]) != 0: + max_f = cost_matrix.max() + min_f = cost_matrix.min() + cost_matrix = np.round((cost_matrix - min_f) / (max_f - min_f) * 255) + d_F_M = [] + cutpff_line = [40]*len(cost_matrix[0])*10 + for c_m in cost_matrix: + add = [] + for row in range(len(c_m)): + add += [255-c_m[row]]*10 + for row in range(10): + d_F_M += [[40]+add+[40]] + d_F_M = np.array(d_F_M) + d_F_M = d_F_M.astype(np.uint8) + cost_matrix_img = cv2.applyColorMap(d_F_M, cv2.COLORMAP_JET) + feature_img2 = cv2.resize(cost_matrix_img, (435, 435)) + #cv2.putText(feature_img2, "cost_matrix", (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + else: + feature_img2 = np.zeros((435, 435)) + feature_img2 = feature_img2.astype(np.uint8) + feature_img2 = cv2.applyColorMap(feature_img2, cv2.COLORMAP_JET) + #cv2.putText(feature_img2, "cost_matrix", (5, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) + feature_img = np.concatenate((feature_img, feature_img2), axis=1) + + dst_path = out_path + "/" + seq_num + "_" + num_zero[len(str(frame_id))-1] + str(frame_id) + '.png' + cv2.imwrite(dst_path, feature_img) \ No newline at end of file diff --git a/tutorials/ctracker/README.md b/tutorials/ctracker/README.md new file mode 100644 index 0000000000000000000000000000000000000000..00d9e3d18abbf7137382e64352405d082def7dc3 --- /dev/null +++ b/tutorials/ctracker/README.md @@ -0,0 +1,65 @@ +# CTracker + +#### Step1 +git clone https://github.com/pjl1995/CTracker.git and preapare dataset + + +#### Step2 + +add generate_half_csv.py to https://github.com/pjl1995/CTracker + +run generate_half_csv.py and put train_half_annots.csv in MOT17 + +run +``` +python3 train.py --root_path MOT17 --csv_train train_half_annots.csv --model_dir ctracker/ --depth 50 --epochs 50 +``` +You can also download the CTracker model trained by us: [google](https://drive.google.com/file/d/1TwBDomJx8pxD-e96mGIiTduLenUvmf1t/view?usp=sharing), [baidu(code:6p3w)](https://pan.baidu.com/s/1MaCvnHynX2Wzg81hWkqzeg) + +#### Step3 + +replace https://github.com/pjl1995/CTracker/blob/master/test.py + +run +``` +python3 test.py --dataset_path MOT17 --model_dir ctracker --model_path ctracker/mot17_half_ctracker.pt +``` + +#### Step4 + +add eval_motchallenge.py to https://github.com/pjl1995/CTracker + +prepare gt_half_val.txt as CenterTrack [DATA.md](https://github.com/xingyizhou/CenterTrack/blob/master/readme/DATA.md) + + +#### Step5 + +run +``` +python3 eval_motchallenge.py --groundtruths MOT17/train --tests ctracker/results --gt_type half_val --eval_official --score_threshold -1 +``` + + + +# CTracker_BYTE + +#### Step3 + +add mot_online to https://github.com/pjl1995/CTracker + +add byte_tracker.py to https://github.com/pjl1995/CTracker + +add test_byte.py to https://github.com/pjl1995/CTracker + +run +``` +python3 test_byte.py --dataset_path MOT17 --model_dir ctracker --model_path ctracker/mot17_half_ctracker.pt +``` + + +#### Step5 + +run +``` +python3 eval_motchallenge.py --groundtruths MOT17/train --tests ctracker/results --gt_type half_val --eval_official --score_threshold -1 +``` diff --git a/tutorials/ctracker/byte_tracker.py b/tutorials/ctracker/byte_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..0a6ae80119025c0b9b35419ab4ccb5a107b25c0e --- /dev/null +++ b/tutorials/ctracker/byte_tracker.py @@ -0,0 +1,343 @@ +import numpy as np +from collections import deque +import os +import os.path as osp +import copy +import torch +import torch.nn.functional as F + +from mot_online.kalman_filter import KalmanFilter +from mot_online.basetrack import BaseTrack, TrackState +from mot_online import matching + + + +class STrack(BaseTrack): + shared_kalman = KalmanFilter() + def __init__(self, tlwh, score): + + # wait activate + self._tlwh = np.asarray(tlwh, dtype=np.float) + self.kalman_filter = None + self.mean, self.covariance = None, None + self.is_activated = False + + self.score = score + self.tracklet_len = 0 + + def predict(self): + mean_state = self.mean.copy() + if self.state != TrackState.Tracked: + mean_state[7] = 0 + self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) + + @staticmethod + def multi_predict(stracks): + if len(stracks) > 0: + multi_mean = np.asarray([st.mean.copy() for st in stracks]) + multi_covariance = np.asarray([st.covariance for st in stracks]) + for i, st in enumerate(stracks): + if st.state != TrackState.Tracked: + multi_mean[i][7] = 0 + multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance) + for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): + stracks[i].mean = mean + stracks[i].covariance = cov + + def activate(self, kalman_filter, frame_id): + """Start a new tracklet""" + self.kalman_filter = kalman_filter + self.track_id = self.next_id() + self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) + + self.tracklet_len = 0 + self.state = TrackState.Tracked + if frame_id == 1: + self.is_activated = True + # self.is_activated = True + self.frame_id = frame_id + self.start_frame = frame_id + + def re_activate(self, new_track, frame_id, new_id=False): + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) + ) + self.tracklet_len = 0 + self.state = TrackState.Tracked + self.is_activated = True + self.frame_id = frame_id + if new_id: + self.track_id = self.next_id() + self.score = new_track.score + + def update(self, new_track, frame_id): + """ + Update a matched track + :type new_track: STrack + :type frame_id: int + :type update_feature: bool + :return: + """ + self.frame_id = frame_id + self.tracklet_len += 1 + + new_tlwh = new_track.tlwh + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) + self.state = TrackState.Tracked + self.is_activated = True + + self.score = new_track.score + + @property + # @jit(nopython=True) + def tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + """ + if self.mean is None: + return self._tlwh.copy() + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + @property + # @jit(nopython=True) + def tlbr(self): + """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_xyah(tlwh): + """Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = np.asarray(tlwh).copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + def to_xyah(self): + return self.tlwh_to_xyah(self.tlwh) + + @staticmethod + # @jit(nopython=True) + def tlbr_to_tlwh(tlbr): + ret = np.asarray(tlbr).copy() + ret[2:] -= ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_tlbr(tlwh): + ret = np.asarray(tlwh).copy() + ret[2:] += ret[:2] + return ret + + def __repr__(self): + return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) + + +class BYTETracker(object): + def __init__(self, frame_rate=30): + self.tracked_stracks = [] # type: list[STrack] + self.lost_stracks = [] # type: list[STrack] + self.removed_stracks = [] # type: list[STrack] + + self.frame_id = 0 + + self.low_thresh = 0.2 + self.track_thresh = 0.4 + self.det_thresh = self.track_thresh + 0.1 + + + self.buffer_size = int(frame_rate / 30.0 * 30) + self.max_time_lost = self.buffer_size + self.kalman_filter = KalmanFilter() + +# def update(self, output_results): + def update(self, det_bboxes, scores): + + self.frame_id += 1 + activated_starcks = [] + refind_stracks = [] + lost_stracks = [] + removed_stracks = [] + +# scores = output_results[:, 4] +# bboxes = output_results[:, :4] # x1y1x2y2 + scores = scores + bboxes = det_bboxes + + remain_inds = scores > self.track_thresh + dets = bboxes[remain_inds] + scores_keep = scores[remain_inds] + + + inds_low = scores > self.low_thresh + inds_high = scores < self.track_thresh + inds_second = np.logical_and(inds_low, inds_high) + dets_second = bboxes[inds_second] + scores_second = scores[inds_second] + + + if len(dets) > 0: + '''Detections''' + detections = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for + (tlbr, s) in zip(dets, scores_keep)] + else: + detections = [] + + ''' Add newly detected tracklets to tracked_stracks''' + unconfirmed = [] + tracked_stracks = [] # type: list[STrack] + for track in self.tracked_stracks: + if not track.is_activated: + unconfirmed.append(track) + else: + tracked_stracks.append(track) + + ''' Step 2: First association, with Kalman and IOU''' + strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) + # Predict the current location with KF + STrack.multi_predict(strack_pool) + dists = matching.iou_distance(strack_pool, detections) + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.8) + + for itracked, idet in matches: + track = strack_pool[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + track.update(detections[idet], self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + ''' Step 3: Second association, with IOU''' + # association the untrack to the low score detections + if len(dets_second) > 0: + '''Detections''' + detections_second = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for + (tlbr, s) in zip(dets_second, scores_second)] + else: + detections_second = [] + r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] + dists = matching.iou_distance(r_tracked_stracks, detections_second) + matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5) + for itracked, idet in matches: + track = r_tracked_stracks[itracked] + det = detections_second[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + for it in u_track: + #track = strack_pool[it] + track = r_tracked_stracks[it] + if not track.state == TrackState.Lost: + track.mark_lost() + lost_stracks.append(track) + + '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' + detections = [detections[i] for i in u_detection] + dists = matching.iou_distance(unconfirmed, detections) + matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) + for itracked, idet in matches: + unconfirmed[itracked].update(detections[idet], self.frame_id) + activated_starcks.append(unconfirmed[itracked]) + for it in u_unconfirmed: + track = unconfirmed[it] + track.mark_removed() + removed_stracks.append(track) + + """ Step 4: Init new stracks""" + for inew in u_detection: + track = detections[inew] + if track.score < self.det_thresh: + continue + track.activate(self.kalman_filter, self.frame_id) + activated_starcks.append(track) + """ Step 5: Update state""" + for track in self.lost_stracks: + if self.frame_id - track.end_frame > self.max_time_lost: + track.mark_removed() + removed_stracks.append(track) + + # print('Ramained match {} s'.format(t4-t3)) + + self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] + self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) + self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) + self.lost_stracks.extend(lost_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) + self.removed_stracks.extend(removed_stracks) + self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) + # get scores of lost tracks + output_stracks = [track for track in self.tracked_stracks if track.is_activated] + + return output_stracks + + + +def joint_stracks(tlista, tlistb): + exists = {} + res = [] + for t in tlista: + exists[t.track_id] = 1 + res.append(t) + for t in tlistb: + tid = t.track_id + if not exists.get(tid, 0): + exists[tid] = 1 + res.append(t) + return res + + +def sub_stracks(tlista, tlistb): + stracks = {} + for t in tlista: + stracks[t.track_id] = t + for t in tlistb: + tid = t.track_id + if stracks.get(tid, 0): + del stracks[tid] + return list(stracks.values()) + + +def remove_duplicate_stracks(stracksa, stracksb): + pdist = matching.iou_distance(stracksa, stracksb) + pairs = np.where(pdist < 0.15) + dupa, dupb = list(), list() + for p, q in zip(*pairs): + timep = stracksa[p].frame_id - stracksa[p].start_frame + timeq = stracksb[q].frame_id - stracksb[q].start_frame + if timep > timeq: + dupb.append(q) + else: + dupa.append(p) + resa = [t for i, t in enumerate(stracksa) if not i in dupa] + resb = [t for i, t in enumerate(stracksb) if not i in dupb] + return resa, resb + + +def remove_fp_stracks(stracksa, n_frame=10): + remain = [] + for t in stracksa: + score_5 = t.score_list[-n_frame:] + score_5 = np.array(score_5, dtype=np.float32) + index = score_5 < 0.45 + num = np.sum(index) + if num < n_frame: + remain.append(t) + return remain diff --git a/tutorials/ctracker/eval_motchallenge.py b/tutorials/ctracker/eval_motchallenge.py new file mode 100644 index 0000000000000000000000000000000000000000..a2b51388a77bd76bfc16a0ac2740e6fcd3d86aac --- /dev/null +++ b/tutorials/ctracker/eval_motchallenge.py @@ -0,0 +1,122 @@ +"""py-motmetrics - metrics for multiple object tracker (MOT) benchmarking. +Christoph Heindl, 2017 +https://github.com/cheind/py-motmetrics +Modified by Rufeng Zhang +""" + +import argparse +import glob +import os +import logging +import motmetrics as mm +import pandas as pd +from collections import OrderedDict +from pathlib import Path + + +def parse_args(): + parser = argparse.ArgumentParser(description=""" +Compute metrics for trackers using MOTChallenge ground-truth data. +Files +----- +All file content, ground truth and test files, have to comply with the +format described in +Milan, Anton, et al. +"Mot16: A benchmark for multi-object tracking." +arXiv preprint arXiv:1603.00831 (2016). +https://motchallenge.net/ +Structure +--------- +Layout for ground truth data + //gt/gt.txt + //gt/gt.txt + ... +Layout for test data + /.txt + /.txt + ... +Sequences of ground truth and test will be matched according to the `` +string.""", formatter_class=argparse.RawTextHelpFormatter) + + parser.add_argument('--groundtruths', type=str, help='Directory containing ground truth files.') + parser.add_argument('--tests', type=str, help='Directory containing tracker result files') + parser.add_argument('--score_threshold', type=float, help='Score threshold',default=0.5) + parser.add_argument('--gt_type', type=str, default='') + parser.add_argument('--eval_official', action='store_true') + parser.add_argument('--loglevel', type=str, help='Log level', default='info') + parser.add_argument('--fmt', type=str, help='Data format', default='mot15-2D') + parser.add_argument('--solver', type=str, help='LAP solver to use') + return parser.parse_args() + + +def compare_dataframes(gts, ts): + accs = [] + names = [] + for k, tsacc in ts.items(): + if k in gts: + logging.info('Comparing {}...'.format(k)) + accs.append(mm.utils.compare_to_groundtruth(gts[k], tsacc, 'iou', distth=0.5)) + names.append(k) + else: + logging.warning('No ground truth for {}, skipping.'.format(k)) + + return accs, names + + +if __name__ == '__main__': + + args = parse_args() + + loglevel = getattr(logging, args.loglevel.upper(), None) + if not isinstance(loglevel, int): + raise ValueError('Invalid log level: {} '.format(args.loglevel)) + logging.basicConfig(level=loglevel, format='%(asctime)s %(levelname)s - %(message)s', datefmt='%I:%M:%S') + + if args.solver: + mm.lap.default_solver = args.solver + + gt_type = args.gt_type + print('gt_type', gt_type) + gtfiles = glob.glob( + os.path.join(args.groundtruths, '*/gt/gt_{}.txt'.format(gt_type))) + print('gt_files', gtfiles) + tsfiles = [f for f in glob.glob(os.path.join(args.tests, '*.txt')) if not os.path.basename(f).startswith('eval')] + + logging.info('Found {} groundtruths and {} test files.'.format(len(gtfiles), len(tsfiles))) + logging.info('Available LAP solvers {}'.format(mm.lap.available_solvers)) + logging.info('Default LAP solver \'{}\''.format(mm.lap.default_solver)) + logging.info('Loading files.') + + gt = OrderedDict([(Path(f).parts[-3], mm.io.loadtxt(f, fmt=args.fmt, min_confidence=1)) for f in gtfiles]) + ts = OrderedDict([(os.path.splitext(Path(f).parts[-1])[0], mm.io.loadtxt(f, fmt=args.fmt, min_confidence=args.score_threshold)) for f in tsfiles]) +# ts = gt + + mh = mm.metrics.create() + accs, names = compare_dataframes(gt, ts) + + logging.info('Running metrics') + metrics = ['recall', 'precision', 'num_unique_objects', 'mostly_tracked', + 'partially_tracked', 'mostly_lost', 'num_false_positives', 'num_misses', + 'num_switches', 'num_fragmentations', 'mota', 'motp', 'num_objects'] + summary = mh.compute_many(accs, names=names, metrics=metrics, generate_overall=True) + # summary = mh.compute_many(accs, names=names, metrics=mm.metrics.motchallenge_metrics, generate_overall=True) + # print(mm.io.render_summary( + # summary, formatters=mh.formatters, + # namemap=mm.io.motchallenge_metric_names)) + div_dict = { + 'num_objects': ['num_false_positives', 'num_misses', 'num_switches', 'num_fragmentations'], + 'num_unique_objects': ['mostly_tracked', 'partially_tracked', 'mostly_lost']} + for divisor in div_dict: + for divided in div_dict[divisor]: + summary[divided] = (summary[divided] / summary[divisor]) + fmt = mh.formatters + change_fmt_list = ['num_false_positives', 'num_misses', 'num_switches', 'num_fragmentations', 'mostly_tracked', + 'partially_tracked', 'mostly_lost'] + for k in change_fmt_list: + fmt[k] = fmt['mota'] + print(mm.io.render_summary(summary, formatters=fmt, namemap=mm.io.motchallenge_metric_names)) + if args.eval_official: + metrics = mm.metrics.motchallenge_metrics + ['num_objects'] + summary = mh.compute_many(accs, names=names, metrics=metrics, generate_overall=True) + print(mm.io.render_summary(summary, formatters=mh.formatters, namemap=mm.io.motchallenge_metric_names)) + logging.info('Completed') diff --git a/tutorials/ctracker/generate_half_csv.py b/tutorials/ctracker/generate_half_csv.py new file mode 100644 index 0000000000000000000000000000000000000000..12ca75bca486e8187cfc45bea50311e8decfdfaf --- /dev/null +++ b/tutorials/ctracker/generate_half_csv.py @@ -0,0 +1,37 @@ +import os +import numpy as np +prefix_dir = 'MOT17/' +root_dir = 'train/' +result_csv = 'train_half_annots.csv' +train_half_set = {2: 301, 4: 526, 5:419, 9:263, 10:328, 11:451, 13:376} +fout = open(result_csv, 'w') + +for data_name in sorted(os.listdir(prefix_dir + root_dir)): + print(data_name) + gt_path = os.path.join(prefix_dir, root_dir, data_name, 'gt', 'gt.txt') + # print(gt_path) + data_raw = np.loadtxt(gt_path, delimiter=',', dtype='float', usecols=(0,1,2,3,4,5,6,7,8)) + + data_sort = data_raw[np.lexsort(data_raw[:,::-1].T)] + visible_raw = data_sort[:,8] + # print(data_sort) + # print(data_sort[-1, 0]) + img_num = data_sort[-1, 0] + + # print(data_sort.shape[0]) + box_num = data_sort.shape[0] + + person_box_num = np.sum(data_sort[:,6] == 1) + # print(person_box_num) +# import ipdb; ipdb.set_trace() + for i in range(box_num): + c = int(data_sort[i, 6]) + v = visible_raw[i] + img_index = int(data_sort[i, 0]) + if c == 1 and v > 0.1 and img_index < train_half_set[int(data_name[-2:])]: + img_index = int(data_sort[i, 0]) + img_name = data_name + '/img1/' + str(img_index).zfill(6) + '.jpg' + print(root_dir + img_name + ', ' + str(int(data_sort[i, 1])) + ', ' + str(data_sort[i, 2]) + ', ' + str(data_sort[i, 3]) + ', ' + str(data_sort[i, 2] + data_sort[i, 4]) + ', ' + str(data_sort[i, 3] + data_sort[i, 5]) + ', person\n') + fout.write(root_dir + img_name + ', ' + str(int(data_sort[i, 1])) + ', ' + str(data_sort[i, 2]) + ', ' + str(data_sort[i, 3]) + ', ' + str(data_sort[i, 2] + data_sort[i, 4]) + ', ' + str(data_sort[i, 3] + data_sort[i, 5]) + ', person\n') + +fout.close() diff --git a/tutorials/ctracker/mot_online/basetrack.py b/tutorials/ctracker/mot_online/basetrack.py new file mode 100644 index 0000000000000000000000000000000000000000..4fe2233607f6d4ed28b11a0ae6c0303c8ca19098 --- /dev/null +++ b/tutorials/ctracker/mot_online/basetrack.py @@ -0,0 +1,52 @@ +import numpy as np +from collections import OrderedDict + + +class TrackState(object): + New = 0 + Tracked = 1 + Lost = 2 + Removed = 3 + + +class BaseTrack(object): + _count = 0 + + track_id = 0 + is_activated = False + state = TrackState.New + + history = OrderedDict() + features = [] + curr_feature = None + score = 0 + start_frame = 0 + frame_id = 0 + time_since_update = 0 + + # multi-camera + location = (np.inf, np.inf) + + @property + def end_frame(self): + return self.frame_id + + @staticmethod + def next_id(): + BaseTrack._count += 1 + return BaseTrack._count + + def activate(self, *args): + raise NotImplementedError + + def predict(self): + raise NotImplementedError + + def update(self, *args, **kwargs): + raise NotImplementedError + + def mark_lost(self): + self.state = TrackState.Lost + + def mark_removed(self): + self.state = TrackState.Removed diff --git a/tutorials/ctracker/mot_online/kalman_filter.py b/tutorials/ctracker/mot_online/kalman_filter.py new file mode 100644 index 0000000000000000000000000000000000000000..b4c4e9854d8abd2fea75ad6b1fe8cd6846c43680 --- /dev/null +++ b/tutorials/ctracker/mot_online/kalman_filter.py @@ -0,0 +1,269 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np +import scipy.linalg + +""" +Table for the 0.95 quantile of the chi-square distribution with N degrees of +freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv +function and used as Mahalanobis gating threshold. +""" +chi2inv95 = { + 1: 3.8415, + 2: 5.9915, + 3: 7.8147, + 4: 9.4877, + 5: 11.070, + 6: 12.592, + 7: 14.067, + 8: 15.507, + 9: 16.919} + + +class KalmanFilter(object): + """ + A simple Kalman filter for tracking bounding boxes in image space. + + The 8-dimensional state space + + x, y, a, h, vx, vy, va, vh + + contains the bounding box center position (x, y), aspect ratio a, height h, + and their respective velocities. + + Object motion follows a constant velocity model. The bounding box location + (x, y, a, h) is taken as direct observation of the state space (linear + observation model). + + """ + + def __init__(self): + ndim, dt = 4, 1. + + # Create Kalman filter model matrices. + self._motion_mat = np.eye(2 * ndim, 2 * ndim) + for i in range(ndim): + self._motion_mat[i, ndim + i] = dt + self._update_mat = np.eye(ndim, 2 * ndim) + + # Motion and observation uncertainty are chosen relative to the current + # state estimate. These weights control the amount of uncertainty in + # the model. This is a bit hacky. + self._std_weight_position = 1. / 20 + self._std_weight_velocity = 1. / 160 + + def initiate(self, measurement): + """Create track from unassociated measurement. + + Parameters + ---------- + measurement : ndarray + Bounding box coordinates (x, y, a, h) with center position (x, y), + aspect ratio a, and height h. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector (8 dimensional) and covariance matrix (8x8 + dimensional) of the new track. Unobserved velocities are initialized + to 0 mean. + + """ + mean_pos = measurement + mean_vel = np.zeros_like(mean_pos) + mean = np.r_[mean_pos, mean_vel] + + std = [ + 2 * self._std_weight_position * measurement[3], + 2 * self._std_weight_position * measurement[3], + 1e-2, + 2 * self._std_weight_position * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 1e-5, + 10 * self._std_weight_velocity * measurement[3]] + covariance = np.diag(np.square(std)) + return mean, covariance + + def predict(self, mean, covariance): + """Run Kalman filter prediction step. + + Parameters + ---------- + mean : ndarray + The 8 dimensional mean vector of the object state at the previous + time step. + covariance : ndarray + The 8x8 dimensional covariance matrix of the object state at the + previous time step. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + + """ + std_pos = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-2, + self._std_weight_position * mean[3]] + std_vel = [ + self._std_weight_velocity * mean[3], + self._std_weight_velocity * mean[3], + 1e-5, + self._std_weight_velocity * mean[3]] + motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) + + #mean = np.dot(self._motion_mat, mean) + mean = np.dot(mean, self._motion_mat.T) + covariance = np.linalg.multi_dot(( + self._motion_mat, covariance, self._motion_mat.T)) + motion_cov + + return mean, covariance + + def project(self, mean, covariance): + """Project state distribution to measurement space. + + Parameters + ---------- + mean : ndarray + The state's mean vector (8 dimensional array). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + + Returns + ------- + (ndarray, ndarray) + Returns the projected mean and covariance matrix of the given state + estimate. + + """ + std = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-1, + self._std_weight_position * mean[3]] + innovation_cov = np.diag(np.square(std)) + + mean = np.dot(self._update_mat, mean) + covariance = np.linalg.multi_dot(( + self._update_mat, covariance, self._update_mat.T)) + return mean, covariance + innovation_cov + + def multi_predict(self, mean, covariance): + """Run Kalman filter prediction step (Vectorized version). + Parameters + ---------- + mean : ndarray + The Nx8 dimensional mean matrix of the object states at the previous + time step. + covariance : ndarray + The Nx8x8 dimensional covariance matrics of the object states at the + previous time step. + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[:, 3], + self._std_weight_position * mean[:, 3], + 1e-2 * np.ones_like(mean[:, 3]), + self._std_weight_position * mean[:, 3]] + std_vel = [ + self._std_weight_velocity * mean[:, 3], + self._std_weight_velocity * mean[:, 3], + 1e-5 * np.ones_like(mean[:, 3]), + self._std_weight_velocity * mean[:, 3]] + sqr = np.square(np.r_[std_pos, std_vel]).T + + motion_cov = [] + for i in range(len(mean)): + motion_cov.append(np.diag(sqr[i])) + motion_cov = np.asarray(motion_cov) + + mean = np.dot(mean, self._motion_mat.T) + left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2)) + covariance = np.dot(left, self._motion_mat.T) + motion_cov + + return mean, covariance + + def update(self, mean, covariance, measurement): + """Run Kalman filter correction step. + + Parameters + ---------- + mean : ndarray + The predicted state's mean vector (8 dimensional). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + measurement : ndarray + The 4 dimensional measurement vector (x, y, a, h), where (x, y) + is the center position, a the aspect ratio, and h the height of the + bounding box. + + Returns + ------- + (ndarray, ndarray) + Returns the measurement-corrected state distribution. + + """ + projected_mean, projected_cov = self.project(mean, covariance) + + chol_factor, lower = scipy.linalg.cho_factor( + projected_cov, lower=True, check_finite=False) + kalman_gain = scipy.linalg.cho_solve( + (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, + check_finite=False).T + innovation = measurement - projected_mean + + new_mean = mean + np.dot(innovation, kalman_gain.T) + new_covariance = covariance - np.linalg.multi_dot(( + kalman_gain, projected_cov, kalman_gain.T)) + return new_mean, new_covariance + + def gating_distance(self, mean, covariance, measurements, + only_position=False, metric='maha'): + """Compute gating distance between state distribution and measurements. + A suitable distance threshold can be obtained from `chi2inv95`. If + `only_position` is False, the chi-square distribution has 4 degrees of + freedom, otherwise 2. + Parameters + ---------- + mean : ndarray + Mean vector over the state distribution (8 dimensional). + covariance : ndarray + Covariance of the state distribution (8x8 dimensional). + measurements : ndarray + An Nx4 dimensional matrix of N measurements, each in + format (x, y, a, h) where (x, y) is the bounding box center + position, a the aspect ratio, and h the height. + only_position : Optional[bool] + If True, distance computation is done with respect to the bounding + box center position only. + Returns + ------- + ndarray + Returns an array of length N, where the i-th element contains the + squared Mahalanobis distance between (mean, covariance) and + `measurements[i]`. + """ + mean, covariance = self.project(mean, covariance) + if only_position: + mean, covariance = mean[:2], covariance[:2, :2] + measurements = measurements[:, :2] + + d = measurements - mean + if metric == 'gaussian': + return np.sum(d * d, axis=1) + elif metric == 'maha': + cholesky_factor = np.linalg.cholesky(covariance) + z = scipy.linalg.solve_triangular( + cholesky_factor, d.T, lower=True, check_finite=False, + overwrite_b=True) + squared_maha = np.sum(z * z, axis=0) + return squared_maha + else: + raise ValueError('invalid distance metric') diff --git a/tutorials/ctracker/mot_online/matching.py b/tutorials/ctracker/mot_online/matching.py new file mode 100644 index 0000000000000000000000000000000000000000..54cb4be09624cdb68581508bdbdeecdc63539b7c --- /dev/null +++ b/tutorials/ctracker/mot_online/matching.py @@ -0,0 +1,198 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import lap +import numpy as np +import scipy +from cython_bbox import bbox_overlaps as bbox_ious +from scipy.spatial.distance import cdist + +chi2inv95 = { + 1: 3.8415, + 2: 5.9915, + 3: 7.8147, + 4: 9.4877, + 5: 11.070, + 6: 12.592, + 7: 14.067, + 8: 15.507, + 9: 16.919} + +def merge_matches(m1, m2, shape): + O,P,Q = shape + m1 = np.asarray(m1) + m2 = np.asarray(m2) + + M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P)) + M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q)) + + mask = M1*M2 + match = mask.nonzero() + match = list(zip(match[0], match[1])) + unmatched_O = tuple(set(range(O)) - set([i for i, j in match])) + unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match])) + + return match, unmatched_O, unmatched_Q + + +def _indices_to_matches(cost_matrix, indices, thresh): + matched_cost = cost_matrix[tuple(zip(*indices))] + matched_mask = (matched_cost <= thresh) + + matches = indices[matched_mask] + unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0])) + unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1])) + + return matches, unmatched_a, unmatched_b + + +def linear_assignment(cost_matrix, thresh): + if cost_matrix.size == 0: + return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) + matches, unmatched_a, unmatched_b = [], [], [] + cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) + for ix, mx in enumerate(x): + if mx >= 0: + matches.append([ix, mx]) + unmatched_a = np.where(x < 0)[0] + unmatched_b = np.where(y < 0)[0] + matches = np.asarray(matches) + return matches, unmatched_a, unmatched_b + + +def ious(atlbrs, btlbrs): + """ + Compute cost based on IoU + :type atlbrs: list[tlbr] | np.ndarray + :type atlbrs: list[tlbr] | np.ndarray + + :rtype ious np.ndarray + """ + ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) + if ious.size == 0: + return ious + + ious = bbox_ious( + np.ascontiguousarray(atlbrs, dtype=np.float), + np.ascontiguousarray(btlbrs, dtype=np.float) + ) + + return ious + + +def iou_distance(atracks, btracks): + """ + Compute cost based on IoU + :type atracks: list[STrack] + :type btracks: list[STrack] + + :rtype cost_matrix np.ndarray + """ + + if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): + atlbrs = atracks + btlbrs = btracks + else: + atlbrs = [track.tlbr for track in atracks] + btlbrs = [track.tlbr for track in btracks] + _ious = ious(atlbrs, btlbrs) + cost_matrix = 1 - _ious + + return cost_matrix + +def embedding_distance(tracks, detections, metric='cosine'): + """ + :param tracks: list[STrack] + :param detections: list[BaseTrack] + :param metric: + :return: cost_matrix np.ndarray + """ + + cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) + if cost_matrix.size == 0: + return cost_matrix + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + #for i, track in enumerate(tracks): + #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + return cost_matrix + +def embedding_distance2(tracks, detections, metric='cosine'): + """ + :param tracks: list[STrack] + :param detections: list[BaseTrack] + :param metric: + :return: cost_matrix np.ndarray + """ + + cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) + if cost_matrix.size == 0: + return cost_matrix + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + #for i, track in enumerate(tracks): + #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + track_features = np.asarray([track.features[0] for track in tracks], dtype=np.float) + cost_matrix2 = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + track_features = np.asarray([track.features[len(track.features)-1] for track in tracks], dtype=np.float) + cost_matrix3 = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + for row in range(len(cost_matrix)): + cost_matrix[row] = (cost_matrix[row]+cost_matrix2[row]+cost_matrix3[row])/3 + return cost_matrix + + +def vis_id_feature_A_distance(tracks, detections, metric='cosine'): + track_features = [] + det_features = [] + leg1 = len(tracks) + leg2 = len(detections) + cost_matrix = np.zeros((leg1, leg2), dtype=np.float) + cost_matrix_det = np.zeros((leg1, leg2), dtype=np.float) + cost_matrix_track = np.zeros((leg1, leg2), dtype=np.float) + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + if leg2 != 0: + cost_matrix_det = np.maximum(0.0, cdist(det_features, det_features, metric)) + if leg1 != 0: + cost_matrix_track = np.maximum(0.0, cdist(track_features, track_features, metric)) + if cost_matrix.size == 0: + return track_features, det_features, cost_matrix, cost_matrix_det, cost_matrix_track + cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) + if leg1 > 10: + leg1 = 10 + tracks = tracks[:10] + if leg2 > 10: + leg2 = 10 + detections = detections[:10] + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + return track_features, det_features, cost_matrix, cost_matrix_det, cost_matrix_track + +def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False): + if cost_matrix.size == 0: + return cost_matrix + gating_dim = 2 if only_position else 4 + gating_threshold = chi2inv95[gating_dim] + measurements = np.asarray([det.to_xyah() for det in detections]) + for row, track in enumerate(tracks): + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position) + cost_matrix[row, gating_distance > gating_threshold] = np.inf + return cost_matrix + + +def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98): + if cost_matrix.size == 0: + return cost_matrix + gating_dim = 2 if only_position else 4 + gating_threshold = chi2inv95[gating_dim] + measurements = np.asarray([det.to_xyah() for det in detections]) + for row, track in enumerate(tracks): + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position, metric='maha') + cost_matrix[row, gating_distance > gating_threshold] = np.inf + cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance + return cost_matrix diff --git a/tutorials/ctracker/test.py b/tutorials/ctracker/test.py new file mode 100644 index 0000000000000000000000000000000000000000..772d9169975cd51f4aad5830fde363f776e97b4b --- /dev/null +++ b/tutorials/ctracker/test.py @@ -0,0 +1,337 @@ +import numpy as np +import torchvision +import time +import math +import os +import copy +import pdb +import argparse +import sys +import cv2 +import skimage.io +import skimage.transform +import skimage.color +import skimage +import torch +import model + +from torch.utils.data import Dataset, DataLoader +from torchvision import datasets, models, transforms +from dataloader import CSVDataset, collater, Resizer, AspectRatioBasedSampler, Augmenter, UnNormalizer, Normalizer, RGB_MEAN, RGB_STD +from scipy.optimize import linear_sum_assignment + +# assert torch.__version__.split('.')[1] == '4' + +print('CUDA available: {}'.format(torch.cuda.is_available())) + +color_list = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 0, 255), (0, 255, 255), (255, 255, 0), (128, 0, 255), +(0, 128, 255), (128, 255, 0), (0, 255, 128), (255, 128, 0), (255, 0, 128), (128, 128, 255), (128, 255, 128), (255, 128, 128), (128, 128, 0), (128, 0, 128)] + +class detect_rect: + def __init__(self): + self.curr_frame = 0 + self.curr_rect = np.array([0, 0, 1, 1]) + self.next_rect = np.array([0, 0, 1, 1]) + self.conf = 0 + self.id = 0 + + @property + def position(self): + x = (self.curr_rect[0] + self.curr_rect[2])/2 + y = (self.curr_rect[1] + self.curr_rect[3])/2 + return np.array([x, y]) + + @property + def size(self): + w = self.curr_rect[2] - self.curr_rect[0] + h = self.curr_rect[3] - self.curr_rect[1] + return np.array([w, h]) + +class tracklet: + def __init__(self, det_rect): + self.id = det_rect.id + self.rect_list = [det_rect] + self.rect_num = 1 + self.last_rect = det_rect + self.last_frame = det_rect.curr_frame + self.no_match_frame = 0 + + def add_rect(self, det_rect): + self.rect_list.append(det_rect) + self.rect_num = self.rect_num + 1 + self.last_rect = det_rect + self.last_frame = det_rect.curr_frame + + @property + def velocity(self): + if(self.rect_num < 2): + return (0, 0) + elif(self.rect_num < 6): + return (self.rect_list[self.rect_num - 1].position - self.rect_list[self.rect_num - 2].position) / (self.rect_list[self.rect_num - 1].curr_frame - self.rect_list[self.rect_num - 2].curr_frame) + else: + v1 = (self.rect_list[self.rect_num - 1].position - self.rect_list[self.rect_num - 4].position) / (self.rect_list[self.rect_num - 1].curr_frame - self.rect_list[self.rect_num - 4].curr_frame) + v2 = (self.rect_list[self.rect_num - 2].position - self.rect_list[self.rect_num - 5].position) / (self.rect_list[self.rect_num - 2].curr_frame - self.rect_list[self.rect_num - 5].curr_frame) + v3 = (self.rect_list[self.rect_num - 3].position - self.rect_list[self.rect_num - 6].position) / (self.rect_list[self.rect_num - 3].curr_frame - self.rect_list[self.rect_num - 6].curr_frame) + return (v1 + v2 + v3) / 3 + + +def cal_iou(rect1, rect2): + x1, y1, x2, y2 = rect1 + x3, y3, x4, y4 = rect2 + i_w = min(x2, x4) - max(x1, x3) + i_h = min(y2, y4) - max(y1, y3) + if(i_w <= 0 or i_h <= 0): + return 0 + i_s = i_w * i_h + s_1 = (x2 - x1) * (y2 - y1) + s_2 = (x4 - x3) * (y4 - y3) + return float(i_s) / (s_1 + s_2 - i_s) + +def cal_simi(det_rect1, det_rect2): + return cal_iou(det_rect1.next_rect, det_rect2.curr_rect) + +def cal_simi_track_det(track, det_rect): + if(det_rect.curr_frame <= track.last_frame): + print("cal_simi_track_det error") + return 0 + elif(det_rect.curr_frame - track.last_frame == 1): + return cal_iou(track.last_rect.next_rect, det_rect.curr_rect) + else: + pred_rect = track.last_rect.curr_rect + np.append(track.velocity, track.velocity) * (det_rect.curr_frame - track.last_frame) + return cal_iou(pred_rect, det_rect.curr_rect) + +def track_det_match(tracklet_list, det_rect_list, min_iou = 0.5): + num1 = len(tracklet_list) + num2 = len(det_rect_list) + cost_mat = np.zeros((num1, num2)) + for i in range(num1): + for j in range(num2): + cost_mat[i, j] = -cal_simi_track_det(tracklet_list[i], det_rect_list[j]) + + match_result = linear_sum_assignment(cost_mat) + match_result = np.asarray(match_result) + match_result = np.transpose(match_result) + + matches, unmatched1, unmatched2 = [], [], [] + for i in range(num1): + if i not in match_result[:, 0]: + unmatched1.append(i) + for j in range(num2): + if j not in match_result[:, 1]: + unmatched2.append(j) + for i, j in match_result: + if cost_mat[i, j] > -min_iou: + unmatched1.append(i) + unmatched2.append(j) + else: + matches.append((i, j)) + return matches, unmatched1, unmatched2 + +def draw_caption(image, box, caption, color): + b = np.array(box).astype(int) + cv2.putText(image, caption, (b[0], b[1] - 8), cv2.FONT_HERSHEY_PLAIN, 2, color, 2) + + +def run_each_dataset(model_dir, retinanet, dataset_path, subset, cur_dataset): + print(cur_dataset) + + img_list = os.listdir(os.path.join(dataset_path, subset, cur_dataset, 'img1')) + img_list = [os.path.join(dataset_path, subset, cur_dataset, 'img1', _) for _ in img_list if ('jpg' in _) or ('png' in _)] + img_list = sorted(img_list) + + img_len = len(img_list) + last_feat = None + + confidence_threshold = 0.4 + IOU_threshold = 0.5 + retention_threshold = 10 + + det_list_all = [] + tracklet_all = [] + max_id = 0 + max_draw_len = 100 + draw_interval = 5 + img_width = 1920 + img_height = 1080 + fps = 30 + + for i in range(img_len): + det_list_all.append([]) + + for idx in range((int(img_len / 2)), img_len + 1): + i = idx - 1 + print('tracking: ', i) + with torch.no_grad(): + data_path1 = img_list[min(idx, img_len - 1)] + img_origin1 = skimage.io.imread(data_path1) + img_h, img_w, _ = img_origin1.shape + img_height, img_width = img_h, img_w + resize_h, resize_w = math.ceil(img_h / 32) * 32, math.ceil(img_w / 32) * 32 + img1 = np.zeros((resize_h, resize_w, 3), dtype=img_origin1.dtype) + img1[:img_h, :img_w, :] = img_origin1 + img1 = (img1.astype(np.float32) / 255.0 - np.array([[RGB_MEAN]])) / np.array([[RGB_STD]]) + img1 = torch.from_numpy(img1).permute(2, 0, 1).view(1, 3, resize_h, resize_w) + scores, transformed_anchors, last_feat = retinanet(img1.cuda().float(), last_feat=last_feat) +# if idx > 0: + if idx > (int(img_len / 2)): + idxs = np.where(scores>0.1) + + for j in range(idxs[0].shape[0]): + bbox = transformed_anchors[idxs[0][j], :] + x1 = int(bbox[0]) + y1 = int(bbox[1]) + x2 = int(bbox[2]) + y2 = int(bbox[3]) + + x3 = int(bbox[4]) + y3 = int(bbox[5]) + x4 = int(bbox[6]) + y4 = int(bbox[7]) + + det_conf = float(scores[idxs[0][j]]) + + det_rect = detect_rect() + det_rect.curr_frame = idx + det_rect.curr_rect = np.array([x1, y1, x2, y2]) + det_rect.next_rect = np.array([x3, y3, x4, y4]) + det_rect.conf = det_conf + + if det_rect.conf > confidence_threshold: + det_list_all[det_rect.curr_frame - 1].append(det_rect) +# if i == 0: + if i == int(img_len / 2): + for j in range(len(det_list_all[i])): + det_list_all[i][j].id = j + 1 + max_id = max(max_id, j + 1) + track = tracklet(det_list_all[i][j]) + tracklet_all.append(track) + continue + + matches, unmatched1, unmatched2 = track_det_match(tracklet_all, det_list_all[i], IOU_threshold) + + for j in range(len(matches)): + det_list_all[i][matches[j][1]].id = tracklet_all[matches[j][0]].id + det_list_all[i][matches[j][1]].id = tracklet_all[matches[j][0]].id + tracklet_all[matches[j][0]].add_rect(det_list_all[i][matches[j][1]]) + + delete_track_list = [] + for j in range(len(unmatched1)): + tracklet_all[unmatched1[j]].no_match_frame = tracklet_all[unmatched1[j]].no_match_frame + 1 + if(tracklet_all[unmatched1[j]].no_match_frame >= retention_threshold): + delete_track_list.append(unmatched1[j]) + + origin_index = set([k for k in range(len(tracklet_all))]) + delete_index = set(delete_track_list) + left_index = list(origin_index - delete_index) + tracklet_all = [tracklet_all[k] for k in left_index] + + + for j in range(len(unmatched2)): + det_list_all[i][unmatched2[j]].id = max_id + 1 + max_id = max_id + 1 + track = tracklet(det_list_all[i][unmatched2[j]]) + tracklet_all.append(track) + + + + #**************visualize tracking result and save evaluate file**************** + + fout_tracking = open(os.path.join(model_dir, 'results', cur_dataset + '.txt'), 'w') + + save_img_dir = os.path.join(model_dir, 'results', cur_dataset) + if not os.path.exists(save_img_dir): + os.makedirs(save_img_dir) + + out_video = os.path.join(model_dir, 'results', cur_dataset + '.mp4') + videoWriter = cv2.VideoWriter(out_video, cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, (img_width, img_height)) + + id_dict = {} + + + for i in range((int(img_len / 2)), img_len): + print('saving: ', i) + img = cv2.imread(img_list[i]) + + for j in range(len(det_list_all[i])): + + x1, y1, x2, y2 = det_list_all[i][j].curr_rect.astype(int) + trace_id = det_list_all[i][j].id + + id_dict.setdefault(str(trace_id),[]).append((int((x1+x2)/2), y2)) + draw_trace_id = str(trace_id) + draw_caption(img, (x1, y1, x2, y2), draw_trace_id, color=color_list[trace_id % len(color_list)]) + cv2.rectangle(img, (x1, y1), (x2, y2), color=color_list[trace_id % len(color_list)], thickness=2) + + trace_len = len(id_dict[str(trace_id)]) + trace_len_draw = min(max_draw_len, trace_len) + + for k in range(trace_len_draw - draw_interval): + if(k % draw_interval == 0): + draw_point1 = id_dict[str(trace_id)][trace_len - k - 1] + draw_point2 = id_dict[str(trace_id)][trace_len - k - 1 - draw_interval] + cv2.line(img, draw_point1, draw_point2, color=color_list[trace_id % len(color_list)], thickness=2) + + fout_tracking.write(str(i+1) + ',' + str(trace_id) + ',' + str(x1) + ',' + str(y1) + ',' + str(x2 - x1) + ',' + str(y2 - y1) + ',-1,-1,-1,-1\n') + + cv2.imwrite(os.path.join(save_img_dir, str(i + 1).zfill(6) + '.jpg'), img) + videoWriter.write(img) +# cv2.waitKey(0) + + fout_tracking.close() + videoWriter.release() + +def run_from_train(model_dir, root_path): + if not os.path.exists(os.path.join(model_dir, 'results')): + os.makedirs(os.path.join(model_dir, 'results')) + retinanet = torch.load(os.path.join(model_dir, 'model_final.pt')) + + use_gpu = True + + if use_gpu: retinanet = retinanet.cuda() + + retinanet.eval() + + for seq_num in [2, 4, 5, 9, 10, 11, 13]: + run_each_dataset(model_dir, retinanet, root_path, 'train', 'MOT17-{:02d}'.format(seq_num)) + for seq_num in [1, 3, 6, 7, 8, 12, 14]: + run_each_dataset(model_dir, retinanet, root_path, 'test', 'MOT17-{:02d}'.format(seq_num)) + +def main(args=None): + parser = argparse.ArgumentParser(description='Simple script for testing a CTracker network.') + parser.add_argument('--dataset_path', default='/dockerdata/home/jeromepeng/data/MOT/MOT17/', type=str, help='Dataset path, location of the images sequence.') + parser.add_argument('--model_dir', default='./trained_model/', help='Path to model (.pt) file.') + parser.add_argument('--model_path', default='./trained_model/model_final.pth', help='Path to model (.pt) file.') + parser = parser.parse_args(args) + + if not os.path.exists(os.path.join(parser.model_dir, 'results')): + os.makedirs(os.path.join(parser.model_dir, 'results')) + + retinanet = model.resnet50(num_classes=1, pretrained=True) +# retinanet_save = torch.load(os.path.join(parser.model_dir, 'model_final.pth')) + retinanet_save = torch.load(os.path.join(parser.model_path)) + + # rename moco pre-trained keys + state_dict = retinanet_save.state_dict() + for k in list(state_dict.keys()): + # retain only encoder up to before the embedding layer + if k.startswith('module.'): + # remove prefix + state_dict[k[len("module."):]] = state_dict[k] + # delete renamed or unused k + del state_dict[k] + + retinanet.load_state_dict(state_dict) + + use_gpu = True + + if use_gpu: retinanet = retinanet.cuda() + + retinanet.eval() + + for seq_num in [2, 4, 5, 9, 10, 11, 13]: + run_each_dataset(parser.model_dir, retinanet, parser.dataset_path, 'train', 'MOT17-{:02d}'.format(seq_num)) +# for seq_num in [1, 3, 6, 7, 8, 12, 14]: +# run_each_dataset(parser.model_dir, retinanet, parser.dataset_path, 'test', 'MOT17-{:02d}'.format(seq_num)) + +if __name__ == '__main__': + main() diff --git a/tutorials/ctracker/test_byte.py b/tutorials/ctracker/test_byte.py new file mode 100644 index 0000000000000000000000000000000000000000..bbb8a53b7a98de5e1c4c5fcffa1a546cc36f0e4b --- /dev/null +++ b/tutorials/ctracker/test_byte.py @@ -0,0 +1,156 @@ +import numpy as np +import torchvision +import time +import math +import os +import copy +import pdb +import argparse +import sys +import cv2 +import skimage.io +import skimage.transform +import skimage.color +import skimage +import torch +import model + +from torch.utils.data import Dataset, DataLoader +from torchvision import datasets, models, transforms +from dataloader import CSVDataset, collater, Resizer, AspectRatioBasedSampler, Augmenter, UnNormalizer, Normalizer, RGB_MEAN, RGB_STD +from scipy.optimize import linear_sum_assignment +from tracker import BYTETracker + + +def write_results(filename, results): + save_format = '{frame},{id},{x1},{y1},{w},{h},{s},-1,-1,-1\n' + with open(filename, 'w') as f: + for frame_id, tlwhs, track_ids, scores in results: + for tlwh, track_id, score in zip(tlwhs, track_ids, scores): + if track_id < 0: + continue + x1, y1, w, h = tlwh + line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1), s=round(score, 2)) + f.write(line) + +def write_results_no_score(filename, results): + save_format = '{frame},{id},{x1},{y1},{w},{h},-1,-1,-1,-1\n' + with open(filename, 'w') as f: + for frame_id, tlwhs, track_ids in results: + for tlwh, track_id in zip(tlwhs, track_ids): + if track_id < 0: + continue + x1, y1, w, h = tlwh + line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1)) + f.write(line) + +def run_each_dataset(model_dir, retinanet, dataset_path, subset, cur_dataset): + print(cur_dataset) + + img_list = os.listdir(os.path.join(dataset_path, subset, cur_dataset, 'img1')) + img_list = [os.path.join(dataset_path, subset, cur_dataset, 'img1', _) for _ in img_list if ('jpg' in _) or ('png' in _)] + img_list = sorted(img_list) + + img_len = len(img_list) + last_feat = None + + confidence_threshold = 0.6 + IOU_threshold = 0.5 + retention_threshold = 10 + + det_list_all = [] + tracklet_all = [] + results = [] + max_id = 0 + max_draw_len = 100 + draw_interval = 5 + img_width = 1920 + img_height = 1080 + fps = 30 + + tracker = BYTETracker() + + for idx in range((int(img_len / 2)), img_len + 1): + i = idx - 1 + print('tracking: ', i) + with torch.no_grad(): + data_path1 = img_list[min(idx, img_len - 1)] + img_origin1 = skimage.io.imread(data_path1) + img_h, img_w, _ = img_origin1.shape + img_height, img_width = img_h, img_w + resize_h, resize_w = math.ceil(img_h / 32) * 32, math.ceil(img_w / 32) * 32 + img1 = np.zeros((resize_h, resize_w, 3), dtype=img_origin1.dtype) + img1[:img_h, :img_w, :] = img_origin1 + img1 = (img1.astype(np.float32) / 255.0 - np.array([[RGB_MEAN]])) / np.array([[RGB_STD]]) + img1 = torch.from_numpy(img1).permute(2, 0, 1).view(1, 3, resize_h, resize_w) + scores, transformed_anchors, last_feat = retinanet(img1.cuda().float(), last_feat=last_feat) + + if idx > (int(img_len / 2)): + idxs = np.where(scores > 0.1) + # run tracking + online_targets = tracker.update(transformed_anchors[idxs[0], :4], scores[idxs[0]]) + online_tlwhs = [] + online_ids = [] + online_scores = [] + for t in online_targets: + tlwh = t.tlwh + tid = t.track_id + online_tlwhs.append(tlwh) + online_ids.append(tid) + online_scores.append(t.score) + results.append((idx, online_tlwhs, online_ids, online_scores)) + + fout_tracking = os.path.join(model_dir, 'results', cur_dataset + '.txt') + write_results(fout_tracking, results) + + + +def main(args=None): + parser = argparse.ArgumentParser(description='Simple script for testing a CTracker network.') + parser.add_argument('--dataset_path', default='/dockerdata/home/jeromepeng/data/MOT/MOT17/', type=str, + help='Dataset path, location of the images sequence.') + parser.add_argument('--model_dir', default='./trained_model/', help='Path to model (.pt) file.') + parser.add_argument('--model_path', default='./trained_model/model_final.pth', help='Path to model (.pt) file.') + parser.add_argument('--seq_nums', default=0, type=int) + + parser = parser.parse_args(args) + + if not os.path.exists(os.path.join(parser.model_dir, 'results')): + os.makedirs(os.path.join(parser.model_dir, 'results')) + + retinanet = model.resnet50(num_classes=1, pretrained=True) + # retinanet_save = torch.load(os.path.join(parser.model_dir, 'model_final.pth')) + retinanet_save = torch.load(os.path.join(parser.model_path)) + + # rename moco pre-trained keys + state_dict = retinanet_save.state_dict() + for k in list(state_dict.keys()): + # retain only encoder up to before the embedding layer + if k.startswith('module.'): + # remove prefix + state_dict[k[len("module."):]] = state_dict[k] + # delete renamed or unused k + del state_dict[k] + + retinanet.load_state_dict(state_dict) + + use_gpu = True + + if use_gpu: retinanet = retinanet.cuda() + + retinanet.eval() + seq_nums = [] + if parser.seq_nums > 0: + seq_nums.append(parser.seq_nums) + else: + seq_nums = [2, 4, 5, 9, 10, 11, 13] + + for seq_num in seq_nums: + run_each_dataset(parser.model_dir, retinanet, parser.dataset_path, 'train', 'MOT17-{:02d}'.format(seq_num)) + + +# for seq_num in [1, 3, 6, 7, 8, 12, 14]: +# run_each_dataset(parser.model_dir, retinanet, parser.dataset_path, 'test', 'MOT17-{:02d}'.format(seq_num)) + +if __name__ == '__main__': + main() diff --git a/tutorials/fairmot/README.md b/tutorials/fairmot/README.md new file mode 100644 index 0000000000000000000000000000000000000000..28d18577464ca29dccd75144d19e9e1810e60519 --- /dev/null +++ b/tutorials/fairmot/README.md @@ -0,0 +1,20 @@ +# FairMOT + +Step1. git clone https://github.com/ifzhang/FairMOT.git + + +Step2. replace https://github.com/ifzhang/FairMOT/blob/master/src/lib/tracker/multitracker.py + + +Step3. run motion + reid tracker using tracker.py (set --match_thres 0.4), run BYTE tracker using byte_tracker.py (set --match_thres 0.8) + +run BYTE tracker example: +``` +python3 track_half.py mot --load_model ../exp/mot/mot17_half_dla34/model_last.pth --match_thres 0.8 +``` + + +## Notes +byte_tracker: only motion + +tracker: motion + reid diff --git a/tutorials/fairmot/byte_tracker.py b/tutorials/fairmot/byte_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..7bb384dcb7e09c3f17f87860ee5d30f48e18ba9d --- /dev/null +++ b/tutorials/fairmot/byte_tracker.py @@ -0,0 +1,403 @@ +import numpy as np +from collections import deque +import itertools +import os +import os.path as osp +import time +import torch +import cv2 +import torch.nn.functional as F + +from models.model import create_model, load_model +from models.decode import mot_decode +from tracking_utils.utils import * +from tracking_utils.log import logger +from tracking_utils.kalman_filter import KalmanFilter +from models import * +from tracker import matching +from .basetrack import BaseTrack, TrackState +from utils.post_process import ctdet_post_process +from utils.image import get_affine_transform +from models.utils import _tranpose_and_gather_feat + +class STrack(BaseTrack): + shared_kalman = KalmanFilter() + def __init__(self, tlwh, score): + + # wait activate + self._tlwh = np.asarray(tlwh, dtype=np.float) + self.kalman_filter = None + self.mean, self.covariance = None, None + self.is_activated = False + + self.score = score + self.tracklet_len = 0 + + def predict(self): + mean_state = self.mean.copy() + if self.state != TrackState.Tracked: + mean_state[7] = 0 + self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) + + @staticmethod + def multi_predict(stracks): + if len(stracks) > 0: + multi_mean = np.asarray([st.mean.copy() for st in stracks]) + multi_covariance = np.asarray([st.covariance for st in stracks]) + for i, st in enumerate(stracks): + if st.state != TrackState.Tracked: + multi_mean[i][7] = 0 + multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance) + for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): + stracks[i].mean = mean + stracks[i].covariance = cov + + def activate(self, kalman_filter, frame_id): + """Start a new tracklet""" + self.kalman_filter = kalman_filter + self.track_id = self.next_id() + self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) + + self.tracklet_len = 0 + self.state = TrackState.Tracked + if frame_id == 1: + self.is_activated = True + #self.is_activated = True + self.frame_id = frame_id + self.start_frame = frame_id + + def re_activate(self, new_track, frame_id, new_id=False): + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) + ) + + self.tracklet_len = 0 + self.state = TrackState.Tracked + self.is_activated = True + self.frame_id = frame_id + if new_id: + self.track_id = self.next_id() + self.score = new_track.score + + def update(self, new_track, frame_id): + """ + Update a matched track + :type new_track: STrack + :type frame_id: int + :type update_feature: bool + :return: + """ + self.frame_id = frame_id + self.tracklet_len += 1 + + new_tlwh = new_track.tlwh + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) + self.state = TrackState.Tracked + self.is_activated = True + + self.score = new_track.score + + @property + # @jit(nopython=True) + def tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + """ + if self.mean is None: + return self._tlwh.copy() + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + @property + # @jit(nopython=True) + def tlbr(self): + """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_xyah(tlwh): + """Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = np.asarray(tlwh).copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + def to_xyah(self): + return self.tlwh_to_xyah(self.tlwh) + + @staticmethod + # @jit(nopython=True) + def tlbr_to_tlwh(tlbr): + ret = np.asarray(tlbr).copy() + ret[2:] -= ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_tlbr(tlwh): + ret = np.asarray(tlwh).copy() + ret[2:] += ret[:2] + return ret + + def __repr__(self): + return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) + + +class BYTETracker(object): + def __init__(self, opt, frame_rate=30): + self.opt = opt + if opt.gpus[0] >= 0: + opt.device = torch.device('cuda') + else: + opt.device = torch.device('cpu') + print('Creating model...') + self.model = create_model(opt.arch, opt.heads, opt.head_conv) + self.model = load_model(self.model, opt.load_model) + self.model = self.model.to(opt.device) + self.model.eval() + + self.tracked_stracks = [] # type: list[STrack] + self.lost_stracks = [] # type: list[STrack] + self.removed_stracks = [] # type: list[STrack] + + self.frame_id = 0 + #self.det_thresh = opt.conf_thres + self.det_thresh = opt.conf_thres + 0.1 + self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer) + self.max_time_lost = self.buffer_size + self.max_per_image = opt.K + self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3) + self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3) + + self.kalman_filter = KalmanFilter() + + def post_process(self, dets, meta): + dets = dets.detach().cpu().numpy() + dets = dets.reshape(1, -1, dets.shape[2]) + dets = ctdet_post_process( + dets.copy(), [meta['c']], [meta['s']], + meta['out_height'], meta['out_width'], self.opt.num_classes) + for j in range(1, self.opt.num_classes + 1): + dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 5) + return dets[0] + + def merge_outputs(self, detections): + results = {} + for j in range(1, self.opt.num_classes + 1): + results[j] = np.concatenate( + [detection[j] for detection in detections], axis=0).astype(np.float32) + + scores = np.hstack( + [results[j][:, 4] for j in range(1, self.opt.num_classes + 1)]) + if len(scores) > self.max_per_image: + kth = len(scores) - self.max_per_image + thresh = np.partition(scores, kth)[kth] + for j in range(1, self.opt.num_classes + 1): + keep_inds = (results[j][:, 4] >= thresh) + results[j] = results[j][keep_inds] + return results + + def update(self, im_blob, img0): + self.frame_id += 1 + activated_starcks = [] + refind_stracks = [] + lost_stracks = [] + removed_stracks = [] + + width = img0.shape[1] + height = img0.shape[0] + inp_height = im_blob.shape[2] + inp_width = im_blob.shape[3] + c = np.array([width / 2., height / 2.], dtype=np.float32) + s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 + meta = {'c': c, 's': s, + 'out_height': inp_height // self.opt.down_ratio, + 'out_width': inp_width // self.opt.down_ratio} + + ''' Step 1: Network forward, get detections & embeddings''' + with torch.no_grad(): + output = self.model(im_blob)[-1] + hm = output['hm'].sigmoid_() + wh = output['wh'] + + reg = output['reg'] if self.opt.reg_offset else None + dets, inds = mot_decode(hm, wh, reg=reg, ltrb=self.opt.ltrb, K=self.opt.K) + + dets = self.post_process(dets, meta) + dets = self.merge_outputs([dets])[1] + + remain_inds = dets[:, 4] > self.opt.conf_thres + inds_low = dets[:, 4] > 0.2 + inds_high = dets[:, 4] < self.opt.conf_thres + inds_second = np.logical_and(inds_low, inds_high) + dets_second = dets[inds_second] + dets = dets[remain_inds] + + if len(dets) > 0: + '''Detections''' + detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4]) for + tlbrs in dets[:, :5]] + else: + detections = [] + + ''' Add newly detected tracklets to tracked_stracks''' + unconfirmed = [] + tracked_stracks = [] # type: list[STrack] + for track in self.tracked_stracks: + if not track.is_activated: + unconfirmed.append(track) + else: + tracked_stracks.append(track) + + ''' Step 2: First association, with IOU''' + strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) + # Predict the current location with KF + STrack.multi_predict(strack_pool) + dists = matching.iou_distance(strack_pool, detections) + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.opt.match_thres) + + for itracked, idet in matches: + track = strack_pool[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + track.update(detections[idet], self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + # association the untrack to the low score detections + if len(dets_second) > 0: + '''Detections''' + detections_second = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4]) for + tlbrs in dets_second[:, :5]] + else: + detections_second = [] + r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] + dists = matching.iou_distance(r_tracked_stracks, detections_second) + matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.4) + for itracked, idet in matches: + track = r_tracked_stracks[itracked] + det = detections_second[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + for it in u_track: + track = r_tracked_stracks[it] + if not track.state == TrackState.Lost: + track.mark_lost() + lost_stracks.append(track) + + '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' + detections = [detections[i] for i in u_detection] + dists = matching.iou_distance(unconfirmed, detections) + matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) + for itracked, idet in matches: + unconfirmed[itracked].update(detections[idet], self.frame_id) + activated_starcks.append(unconfirmed[itracked]) + for it in u_unconfirmed: + track = unconfirmed[it] + track.mark_removed() + removed_stracks.append(track) + + """ Step 4: Init new stracks""" + for inew in u_detection: + track = detections[inew] + if track.score < self.det_thresh: + continue + track.activate(self.kalman_filter, self.frame_id) + activated_starcks.append(track) + """ Step 5: Update state""" + for track in self.lost_stracks: + if self.frame_id - track.end_frame > self.max_time_lost: + track.mark_removed() + removed_stracks.append(track) + + # print('Ramained match {} s'.format(t4-t3)) + + self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] + self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) + self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) + self.lost_stracks.extend(lost_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) + self.removed_stracks.extend(removed_stracks) + self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) + #self.tracked_stracks = remove_fp_stracks(self.tracked_stracks) + # get scores of lost tracks + output_stracks = [track for track in self.tracked_stracks if track.is_activated] + + logger.debug('===========Frame {}=========='.format(self.frame_id)) + logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) + logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) + logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) + logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) + + return output_stracks + + +def joint_stracks(tlista, tlistb): + exists = {} + res = [] + for t in tlista: + exists[t.track_id] = 1 + res.append(t) + for t in tlistb: + tid = t.track_id + if not exists.get(tid, 0): + exists[tid] = 1 + res.append(t) + return res + + +def sub_stracks(tlista, tlistb): + stracks = {} + for t in tlista: + stracks[t.track_id] = t + for t in tlistb: + tid = t.track_id + if stracks.get(tid, 0): + del stracks[tid] + return list(stracks.values()) + + +def remove_duplicate_stracks(stracksa, stracksb): + pdist = matching.iou_distance(stracksa, stracksb) + pairs = np.where(pdist < 0.15) + dupa, dupb = list(), list() + for p, q in zip(*pairs): + timep = stracksa[p].frame_id - stracksa[p].start_frame + timeq = stracksb[q].frame_id - stracksb[q].start_frame + if timep > timeq: + dupb.append(q) + else: + dupa.append(p) + resa = [t for i, t in enumerate(stracksa) if not i in dupa] + resb = [t for i, t in enumerate(stracksb) if not i in dupb] + return resa, resb + + +def remove_fp_stracks(stracksa, n_frame=10): + remain = [] + for t in stracksa: + score_5 = t.score_list[-n_frame:] + score_5 = np.array(score_5, dtype=np.float32) + index = score_5 < 0.45 + num = np.sum(index) + if num < n_frame: + remain.append(t) + return remain diff --git a/tutorials/fairmot/tracker.py b/tutorials/fairmot/tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..b3af90ee066585c846735914cd7bb50ede767e2d --- /dev/null +++ b/tutorials/fairmot/tracker.py @@ -0,0 +1,465 @@ +import numpy as np +from collections import deque +import itertools +import os +import os.path as osp +import time +import torch +import cv2 +import torch.nn.functional as F + +from models.model import create_model, load_model +from models.decode import mot_decode +from tracking_utils.utils import * +from tracking_utils.log import logger +from tracking_utils.kalman_filter import KalmanFilter +from models import * +from tracker import matching +from .basetrack import BaseTrack, TrackState +from utils.post_process import ctdet_post_process +from utils.image import get_affine_transform +from models.utils import _tranpose_and_gather_feat + +class STrack(BaseTrack): + shared_kalman = KalmanFilter() + def __init__(self, tlwh, score, temp_feat, buffer_size=30): + + # wait activate + self._tlwh = np.asarray(tlwh, dtype=np.float) + self.kalman_filter = None + self.mean, self.covariance = None, None + self.is_activated = False + + self.score = score + self.score_list = [] + self.tracklet_len = 0 + + self.smooth_feat = None + self.update_features(temp_feat) + self.features = deque([], maxlen=buffer_size) + self.alpha = 0.9 + + def update_features(self, feat): + feat /= np.linalg.norm(feat) + self.curr_feat = feat + if self.smooth_feat is None: + self.smooth_feat = feat + else: + self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat + self.features.append(feat) + self.smooth_feat /= np.linalg.norm(self.smooth_feat) + + def predict(self): + mean_state = self.mean.copy() + if self.state != TrackState.Tracked: + mean_state[7] = 0 + self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) + + @staticmethod + def multi_predict(stracks): + if len(stracks) > 0: + multi_mean = np.asarray([st.mean.copy() for st in stracks]) + multi_covariance = np.asarray([st.covariance for st in stracks]) + for i, st in enumerate(stracks): + if st.state != TrackState.Tracked: + multi_mean[i][7] = 0 + multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance) + for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): + stracks[i].mean = mean + stracks[i].covariance = cov + + def activate(self, kalman_filter, frame_id): + """Start a new tracklet""" + self.kalman_filter = kalman_filter + self.track_id = self.next_id() + self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) + + self.tracklet_len = 0 + self.state = TrackState.Tracked + if frame_id == 1: + self.is_activated = True + #self.is_activated = True + self.frame_id = frame_id + self.start_frame = frame_id + self.score_list.append(self.score) + + def re_activate(self, new_track, frame_id, new_id=False): + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) + ) + + self.update_features(new_track.curr_feat) + self.tracklet_len = 0 + self.state = TrackState.Tracked + self.is_activated = True + self.frame_id = frame_id + if new_id: + self.track_id = self.next_id() + self.score = new_track.score + self.score_list.append(self.score) + + def update(self, new_track, frame_id, update_feature=True): + """ + Update a matched track + :type new_track: STrack + :type frame_id: int + :type update_feature: bool + :return: + """ + self.frame_id = frame_id + self.tracklet_len += 1 + + new_tlwh = new_track.tlwh + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) + self.state = TrackState.Tracked + self.is_activated = True + + self.score = new_track.score + self.score_list.append(self.score) + if update_feature: + self.update_features(new_track.curr_feat) + + @property + # @jit(nopython=True) + def tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + """ + if self.mean is None: + return self._tlwh.copy() + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + @property + # @jit(nopython=True) + def tlbr(self): + """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_xyah(tlwh): + """Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = np.asarray(tlwh).copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + def to_xyah(self): + return self.tlwh_to_xyah(self.tlwh) + + @staticmethod + # @jit(nopython=True) + def tlbr_to_tlwh(tlbr): + ret = np.asarray(tlbr).copy() + ret[2:] -= ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_tlbr(tlwh): + ret = np.asarray(tlwh).copy() + ret[2:] += ret[:2] + return ret + + def __repr__(self): + return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) + + +class JDETracker(object): + def __init__(self, opt, frame_rate=30): + self.opt = opt + if opt.gpus[0] >= 0: + opt.device = torch.device('cuda') + else: + opt.device = torch.device('cpu') + print('Creating model...') + self.model = create_model(opt.arch, opt.heads, opt.head_conv) + self.model = load_model(self.model, opt.load_model) + self.model = self.model.to(opt.device) + self.model.eval() + + self.tracked_stracks = [] # type: list[STrack] + self.lost_stracks = [] # type: list[STrack] + self.removed_stracks = [] # type: list[STrack] + + self.frame_id = 0 + #self.det_thresh = opt.conf_thres + self.det_thresh = opt.conf_thres + 0.1 + self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer) + self.max_time_lost = self.buffer_size + self.max_per_image = opt.K + self.mean = np.array(opt.mean, dtype=np.float32).reshape(1, 1, 3) + self.std = np.array(opt.std, dtype=np.float32).reshape(1, 1, 3) + + self.kalman_filter = KalmanFilter() + + def post_process(self, dets, meta): + dets = dets.detach().cpu().numpy() + dets = dets.reshape(1, -1, dets.shape[2]) + dets = ctdet_post_process( + dets.copy(), [meta['c']], [meta['s']], + meta['out_height'], meta['out_width'], self.opt.num_classes) + for j in range(1, self.opt.num_classes + 1): + dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 5) + return dets[0] + + def merge_outputs(self, detections): + results = {} + for j in range(1, self.opt.num_classes + 1): + results[j] = np.concatenate( + [detection[j] for detection in detections], axis=0).astype(np.float32) + + scores = np.hstack( + [results[j][:, 4] for j in range(1, self.opt.num_classes + 1)]) + if len(scores) > self.max_per_image: + kth = len(scores) - self.max_per_image + thresh = np.partition(scores, kth)[kth] + for j in range(1, self.opt.num_classes + 1): + keep_inds = (results[j][:, 4] >= thresh) + results[j] = results[j][keep_inds] + return results + + def update(self, im_blob, img0): + self.frame_id += 1 + activated_starcks = [] + refind_stracks = [] + lost_stracks = [] + removed_stracks = [] + + width = img0.shape[1] + height = img0.shape[0] + inp_height = im_blob.shape[2] + inp_width = im_blob.shape[3] + c = np.array([width / 2., height / 2.], dtype=np.float32) + s = max(float(inp_width) / float(inp_height) * height, width) * 1.0 + meta = {'c': c, 's': s, + 'out_height': inp_height // self.opt.down_ratio, + 'out_width': inp_width // self.opt.down_ratio} + + ''' Step 1: Network forward, get detections & embeddings''' + with torch.no_grad(): + output = self.model(im_blob)[-1] + hm = output['hm'].sigmoid_() + wh = output['wh'] + id_feature = output['id'] + id_feature = F.normalize(id_feature, dim=1) + + reg = output['reg'] if self.opt.reg_offset else None + dets, inds = mot_decode(hm, wh, reg=reg, ltrb=self.opt.ltrb, K=self.opt.K) + id_feature = _tranpose_and_gather_feat(id_feature, inds) + id_feature = id_feature.squeeze(0) + id_feature = id_feature.cpu().numpy() + + dets = self.post_process(dets, meta) + dets = self.merge_outputs([dets])[1] + + remain_inds = dets[:, 4] > self.opt.conf_thres + inds_low = dets[:, 4] > 0.2 + #inds_low = dets[:, 4] > self.opt.conf_thres + inds_high = dets[:, 4] < self.opt.conf_thres + inds_second = np.logical_and(inds_low, inds_high) + dets_second = dets[inds_second] + id_feature_second = id_feature[inds_second] + dets = dets[remain_inds] + id_feature = id_feature[remain_inds] + + # vis + ''' + for i in range(0, dets.shape[0]): + bbox = dets[i][0:4] + cv2.rectangle(img0, (bbox[0], bbox[1]), + (bbox[2], bbox[3]), + (0, 255, 0), 2) + cv2.imshow('dets', img0) + cv2.waitKey(0) + id0 = id0-1 + ''' + + if len(dets) > 0: + '''Detections''' + detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for + (tlbrs, f) in zip(dets[:, :5], id_feature)] + else: + detections = [] + + ''' Add newly detected tracklets to tracked_stracks''' + unconfirmed = [] + tracked_stracks = [] # type: list[STrack] + for track in self.tracked_stracks: + if not track.is_activated: + unconfirmed.append(track) + else: + tracked_stracks.append(track) + + ''' Step 2: First association, with embedding''' + strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) + # Predict the current location with KF + STrack.multi_predict(strack_pool) + dists = matching.embedding_distance(strack_pool, detections) + #dists = matching.fuse_iou(dists, strack_pool, detections) + #dists = matching.iou_distance(strack_pool, detections) + dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.opt.match_thres) + + for itracked, idet in matches: + track = strack_pool[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + track.update(detections[idet], self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + ''' Step 3: Second association, with IOU''' + detections = [detections[i] for i in u_detection] + r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] + dists = matching.iou_distance(r_tracked_stracks, detections) + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) + + for itracked, idet in matches: + track = r_tracked_stracks[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + # association the untrack to the low score detections + if len(dets_second) > 0: + '''Detections''' + detections_second = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for + (tlbrs, f) in zip(dets_second[:, :5], id_feature_second)] + else: + detections_second = [] + second_tracked_stracks = [r_tracked_stracks[i] for i in u_track if r_tracked_stracks[i].state == TrackState.Tracked] + dists = matching.iou_distance(second_tracked_stracks, detections_second) + matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.4) + for itracked, idet in matches: + track = second_tracked_stracks[itracked] + det = detections_second[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + for it in u_track: + #track = r_tracked_stracks[it] + track = second_tracked_stracks[it] + if not track.state == TrackState.Lost: + track.mark_lost() + lost_stracks.append(track) + + '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' + detections = [detections[i] for i in u_detection] + dists = matching.iou_distance(unconfirmed, detections) + matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) + for itracked, idet in matches: + unconfirmed[itracked].update(detections[idet], self.frame_id) + activated_starcks.append(unconfirmed[itracked]) + for it in u_unconfirmed: + track = unconfirmed[it] + track.mark_removed() + removed_stracks.append(track) + + """ Step 4: Init new stracks""" + for inew in u_detection: + track = detections[inew] + if track.score < self.det_thresh: + continue + track.activate(self.kalman_filter, self.frame_id) + activated_starcks.append(track) + """ Step 5: Update state""" + for track in self.lost_stracks: + if self.frame_id - track.end_frame > self.max_time_lost: + track.mark_removed() + removed_stracks.append(track) + + # print('Ramained match {} s'.format(t4-t3)) + + self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] + self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) + self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) + self.lost_stracks.extend(lost_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) + self.removed_stracks.extend(removed_stracks) + self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) + #self.tracked_stracks = remove_fp_stracks(self.tracked_stracks) + # get scores of lost tracks + output_stracks = [track for track in self.tracked_stracks if track.is_activated] + + logger.debug('===========Frame {}=========='.format(self.frame_id)) + logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) + logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) + logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) + logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) + + return output_stracks + + +def joint_stracks(tlista, tlistb): + exists = {} + res = [] + for t in tlista: + exists[t.track_id] = 1 + res.append(t) + for t in tlistb: + tid = t.track_id + if not exists.get(tid, 0): + exists[tid] = 1 + res.append(t) + return res + + +def sub_stracks(tlista, tlistb): + stracks = {} + for t in tlista: + stracks[t.track_id] = t + for t in tlistb: + tid = t.track_id + if stracks.get(tid, 0): + del stracks[tid] + return list(stracks.values()) + + +def remove_duplicate_stracks(stracksa, stracksb): + pdist = matching.iou_distance(stracksa, stracksb) + pairs = np.where(pdist < 0.15) + dupa, dupb = list(), list() + for p, q in zip(*pairs): + timep = stracksa[p].frame_id - stracksa[p].start_frame + timeq = stracksb[q].frame_id - stracksb[q].start_frame + if timep > timeq: + dupb.append(q) + else: + dupa.append(p) + resa = [t for i, t in enumerate(stracksa) if not i in dupa] + resb = [t for i, t in enumerate(stracksb) if not i in dupb] + return resa, resb + + +def remove_fp_stracks(stracksa, n_frame=10): + remain = [] + for t in stracksa: + score_5 = t.score_list[-n_frame:] + score_5 = np.array(score_5, dtype=np.float32) + index = score_5 < 0.45 + num = np.sum(index) + if num < n_frame: + remain.append(t) + return remain diff --git a/tutorials/jde/README.md b/tutorials/jde/README.md new file mode 100644 index 0000000000000000000000000000000000000000..cd915a2225a09b013b2c3ab55b5b2d7e19c66ec0 --- /dev/null +++ b/tutorials/jde/README.md @@ -0,0 +1,19 @@ +# JDE + +Step1. git clone https://github.com/Zhongdao/Towards-Realtime-MOT.git + + +Step2. replace https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/multitracker.py + +Step3. download JDE model trained on MIX and MOT17_half (mix_mot17_half_jde.pt): [google](https://drive.google.com/file/d/1jUiIbaHFf75Jq6thOGI3CPygMMBy6850/view?usp=sharing), [baidu(code:ccdd)](https://pan.baidu.com/s/10se81ZktkUDUWn2dZzkk_Q) + +Step4. put track_half.py under https://github.com/Zhongdao/Towards-Realtime-MOT and run: +``` +python3 track_half.py --cfg ./cfg/yolov3_1088x608.cfg --weights weights/mix_mot17_half_jde.pt +``` + + +## Notes +byte_tracker: only motion + +tracker: motion + reid diff --git a/tutorials/jde/byte_tracker.py b/tutorials/jde/byte_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..63baccebb1e7bd710d863984426bb94c2770d95d --- /dev/null +++ b/tutorials/jde/byte_tracker.py @@ -0,0 +1,369 @@ +from collections import deque +import torch +import numpy as np +from utils.kalman_filter import KalmanFilter +from utils.log import logger +from models import * +from tracker import matching +from .basetrack import BaseTrack, TrackState + + +class STrack(BaseTrack): + + def __init__(self, tlwh, score): + + # wait activate + self._tlwh = np.asarray(tlwh, dtype=np.float) + self.kalman_filter = None + self.mean, self.covariance = None, None + self.is_activated = False + + self.score = score + self.tracklet_len = 0 + + def predict(self): + mean_state = self.mean.copy() + if self.state != TrackState.Tracked: + mean_state[7] = 0 + self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) + + @staticmethod + def multi_predict(stracks, kalman_filter): + if len(stracks) > 0: + multi_mean = np.asarray([st.mean.copy() for st in stracks]) + multi_covariance = np.asarray([st.covariance for st in stracks]) + for i, st in enumerate(stracks): + if st.state != TrackState.Tracked: + multi_mean[i][7] = 0 +# multi_mean, multi_covariance = STrack.kalman_filter.multi_predict(multi_mean, multi_covariance) + multi_mean, multi_covariance = kalman_filter.multi_predict(multi_mean, multi_covariance) + for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): + stracks[i].mean = mean + stracks[i].covariance = cov + + def activate(self, kalman_filter, frame_id): + """Start a new tracklet""" + self.kalman_filter = kalman_filter + self.track_id = self.next_id() + self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) + + self.tracklet_len = 0 + self.state = TrackState.Tracked + #self.is_activated = True + self.frame_id = frame_id + self.start_frame = frame_id + + def re_activate(self, new_track, frame_id, new_id=False): + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) + ) + + self.tracklet_len = 0 + self.state = TrackState.Tracked + self.is_activated = True + self.frame_id = frame_id + if new_id: + self.track_id = self.next_id() + + def update(self, new_track, frame_id, update_feature=True): + """ + Update a matched track + :type new_track: STrack + :type frame_id: int + :type update_feature: bool + :return: + """ + self.frame_id = frame_id + self.tracklet_len += 1 + + new_tlwh = new_track.tlwh + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) + self.state = TrackState.Tracked + self.is_activated = True + + self.score = new_track.score + + @property + def tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + """ + if self.mean is None: + return self._tlwh.copy() + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + @property + def tlbr(self): + """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + @staticmethod + def tlwh_to_xyah(tlwh): + """Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = np.asarray(tlwh).copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + def to_xyah(self): + return self.tlwh_to_xyah(self.tlwh) + + @staticmethod + def tlbr_to_tlwh(tlbr): + ret = np.asarray(tlbr).copy() + ret[2:] -= ret[:2] + return ret + + @staticmethod + def tlwh_to_tlbr(tlwh): + ret = np.asarray(tlwh).copy() + ret[2:] += ret[:2] + return ret + + def __repr__(self): + return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) + + +class BYTETracker(object): + def __init__(self, opt, frame_rate=30): + self.opt = opt + self.model = Darknet(opt.cfg, nID=14455) + # load_darknet_weights(self.model, opt.weights) + self.model.load_state_dict(torch.load(opt.weights, map_location='cpu')['model'], strict=False) + self.model.cuda().eval() + + self.tracked_stracks = [] # type: list[STrack] + self.lost_stracks = [] # type: list[STrack] + self.removed_stracks = [] # type: list[STrack] + + self.frame_id = 0 + self.det_thresh = opt.conf_thres + self.init_thresh = self.det_thresh + 0.2 + self.low_thresh = 0.3 + self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer) + self.max_time_lost = self.buffer_size + + self.kalman_filter = KalmanFilter() + + def update(self, im_blob, img0): + """ + Processes the image frame and finds bounding box(detections). + + Associates the detection with corresponding tracklets and also handles lost, removed, refound and active tracklets + + Parameters + ---------- + im_blob : torch.float32 + Tensor of shape depending upon the size of image. By default, shape of this tensor is [1, 3, 608, 1088] + + img0 : ndarray + ndarray of shape depending on the input image sequence. By default, shape is [608, 1080, 3] + + Returns + ------- + output_stracks : list of Strack(instances) + The list contains information regarding the online_tracklets for the recieved image tensor. + + """ + + self.frame_id += 1 + activated_starcks = [] # for storing active tracks, for the current frame + refind_stracks = [] # Lost Tracks whose detections are obtained in the current frame + lost_stracks = [] # The tracks which are not obtained in the current frame but are not removed.(Lost for some time lesser than the threshold for removing) + removed_stracks = [] + + t1 = time.time() + ''' Step 1: Network forward, get detections & embeddings''' + with torch.no_grad(): + pred = self.model(im_blob) + # pred is tensor of all the proposals (default number of proposals: 54264). Proposals have information associated with the bounding box and embeddings + pred = pred[pred[:, :, 4] > self.low_thresh] + # pred now has lesser number of proposals. Proposals rejected on basis of object confidence score + if len(pred) > 0: + dets = non_max_suppression(pred.unsqueeze(0), self.low_thresh, self.opt.nms_thres)[0].cpu() + # Final proposals are obtained in dets. Information of bounding box and embeddings also included + # Next step changes the detection scales + scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round() + '''Detections is list of (x1, y1, x2, y2, object_conf, class_score, class_pred)''' + # class_pred is the embeddings. + + dets = dets.numpy() + remain_inds = dets[:, 4] > self.det_thresh + inds_low = dets[:, 4] > self.low_thresh + inds_high = dets[:, 4] < self.det_thresh + inds_second = np.logical_and(inds_low, inds_high) + dets_second = dets[inds_second] + dets = dets[remain_inds] + + detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4]) for + tlbrs in dets[:, :5]] + else: + detections = [] + dets_second = [] + + t2 = time.time() + # print('Forward: {} s'.format(t2-t1)) + + ''' Add newly detected tracklets to tracked_stracks''' + unconfirmed = [] + tracked_stracks = [] # type: list[STrack] + for track in self.tracked_stracks: + if not track.is_activated: + # previous tracks which are not active in the current frame are added in unconfirmed list + unconfirmed.append(track) + # print("Should not be here, in unconfirmed") + else: + # Active tracks are added to the local list 'tracked_stracks' + tracked_stracks.append(track) + + ''' Step 2: First association, with embedding''' + # Combining currently tracked_stracks and lost_stracks + strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) + # Predict the current location with KF + STrack.multi_predict(strack_pool, self.kalman_filter) + dists = matching.iou_distance(strack_pool, detections) + # The dists is the list of distances of the detection with the tracks in strack_pool + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.8) + # The matches is the array for corresponding matches of the detection with the corresponding strack_pool + + for itracked, idet in matches: + # itracked is the id of the track and idet is the detection + track = strack_pool[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + # If the track is active, add the detection to the track + track.update(detections[idet], self.frame_id) + activated_starcks.append(track) + else: + # We have obtained a detection from a track which is not active, hence put the track in refind_stracks list + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + # association the untrack to the low score detections + if len(dets_second) > 0: + detections_second = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4]) for + tlbrs in dets_second[:, :5]] + else: + detections_second = [] + r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] + dists = matching.iou_distance(r_tracked_stracks, detections_second) + matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.4) + for itracked, idet in matches: + track = r_tracked_stracks[itracked] + det = detections_second[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + for it in u_track: + track = r_tracked_stracks[it] + if not track.state == TrackState.Lost: + track.mark_lost() + lost_stracks.append(track) + # If no detections are obtained for tracks (u_track), the tracks are added to lost_tracks list and are marked lost + + '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' + detections = [detections[i] for i in u_detection] + dists = matching.iou_distance(unconfirmed, detections) + matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) + for itracked, idet in matches: + unconfirmed[itracked].update(detections[idet], self.frame_id) + activated_starcks.append(unconfirmed[itracked]) + + # The tracks which are yet not matched + for it in u_unconfirmed: + track = unconfirmed[it] + track.mark_removed() + removed_stracks.append(track) + + # after all these confirmation steps, if a new detection is found, it is initialized for a new track + """ Step 4: Init new stracks""" + for inew in u_detection: + track = detections[inew] + if track.score < self.init_thresh: + continue + track.activate(self.kalman_filter, self.frame_id) + activated_starcks.append(track) + + """ Step 5: Update state""" + # If the tracks are lost for more frames than the threshold number, the tracks are removed. + for track in self.lost_stracks: + if self.frame_id - track.end_frame > self.max_time_lost: + track.mark_removed() + removed_stracks.append(track) + # print('Remained match {} s'.format(t4-t3)) + + # Update the self.tracked_stracks and self.lost_stracks using the updates in this step. + self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] + self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) + self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) + # self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost] # type: list[STrack] + self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) + self.lost_stracks.extend(lost_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) + self.removed_stracks.extend(removed_stracks) + self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) + + # get scores of lost tracks + output_stracks = [track for track in self.tracked_stracks if track.is_activated] + + logger.debug('===========Frame {}=========='.format(self.frame_id)) + logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) + logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) + logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) + logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) + # print('Final {} s'.format(t5-t4)) + return output_stracks + +def joint_stracks(tlista, tlistb): + exists = {} + res = [] + for t in tlista: + exists[t.track_id] = 1 + res.append(t) + for t in tlistb: + tid = t.track_id + if not exists.get(tid, 0): + exists[tid] = 1 + res.append(t) + return res + +def sub_stracks(tlista, tlistb): + stracks = {} + for t in tlista: + stracks[t.track_id] = t + for t in tlistb: + tid = t.track_id + if stracks.get(tid, 0): + del stracks[tid] + return list(stracks.values()) + +def remove_duplicate_stracks(stracksa, stracksb): + pdist = matching.iou_distance(stracksa, stracksb) + pairs = np.where(pdist<0.15) + dupa, dupb = list(), list() + for p,q in zip(*pairs): + timep = stracksa[p].frame_id - stracksa[p].start_frame + timeq = stracksb[q].frame_id - stracksb[q].start_frame + if timep > timeq: + dupb.append(q) + else: + dupa.append(p) + resa = [t for i,t in enumerate(stracksa) if not i in dupa] + resb = [t for i,t in enumerate(stracksb) if not i in dupb] + return resa, resb + + diff --git a/tutorials/jde/track_half.py b/tutorials/jde/track_half.py new file mode 100644 index 0000000000000000000000000000000000000000..55d6c13d6eca83359c88e22f261b95533ddb05f5 --- /dev/null +++ b/tutorials/jde/track_half.py @@ -0,0 +1,222 @@ +import os +import os.path as osp +import cv2 +import logging +import argparse +import motmetrics as mm + +import torch +#from tracker.multitracker import JDETracker +from tracker.byte_tracker import BYTETracker +from utils import visualization as vis +from utils.log import logger +from utils.timer import Timer +from utils.evaluation import Evaluator +from utils.parse_config import parse_model_cfg +import utils.datasets as datasets +from utils.utils import * + + +def write_results(filename, results, data_type): + if data_type == 'mot': + save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n' + elif data_type == 'kitti': + save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n' + else: + raise ValueError(data_type) + + with open(filename, 'w') as f: + for frame_id, tlwhs, track_ids in results: + if data_type == 'kitti': + frame_id -= 1 + for tlwh, track_id in zip(tlwhs, track_ids): + if track_id < 0: + continue + x1, y1, w, h = tlwh + x2, y2 = x1 + w, y1 + h + line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h) + f.write(line) + logger.info('save results to {}'.format(filename)) + + +def eval_seq(opt, dataloader, data_type, result_filename, save_dir=None, show_image=True, frame_rate=30): + ''' + Processes the video sequence given and provides the output of tracking result (write the results in video file) + + It uses JDE model for getting information about the online targets present. + + Parameters + ---------- + opt : Namespace + Contains information passed as commandline arguments. + + dataloader : LoadVideo + Instance of LoadVideo class used for fetching the image sequence and associated data. + + data_type : String + Type of dataset corresponding(similar) to the given video. + + result_filename : String + The name(path) of the file for storing results. + + save_dir : String + Path to the folder for storing the frames containing bounding box information (Result frames). + + show_image : bool + Option for shhowing individial frames during run-time. + + frame_rate : int + Frame-rate of the given video. + + Returns + ------- + (Returns are not significant here) + frame_id : int + Sequence number of the last sequence + ''' + + if save_dir: + mkdir_if_missing(save_dir) + tracker = BYTETracker(opt, frame_rate=frame_rate) + timer = Timer() + results = [] + len_all = len(dataloader) + start_frame = int(len_all / 2) + frame_id = int(len_all / 2) + for i, (path, img, img0) in enumerate(dataloader): + if i < start_frame: + continue + if frame_id % 20 == 0: + logger.info('Processing frame {} ({:.2f} fps)'.format(frame_id, 1./max(1e-5, timer.average_time))) + + # run tracking + timer.tic() + blob = torch.from_numpy(img).cuda().unsqueeze(0) + online_targets = tracker.update(blob, img0) + online_tlwhs = [] + online_ids = [] + for t in online_targets: + tlwh = t.tlwh + tid = t.track_id + vertical = tlwh[2] / tlwh[3] > 1.6 + if tlwh[2] * tlwh[3] > opt.min_box_area and not vertical: + online_tlwhs.append(tlwh) + online_ids.append(tid) + timer.toc() + # save results + results.append((frame_id + 1, online_tlwhs, online_ids)) + if show_image or save_dir is not None: + online_im = vis.plot_tracking(img0, online_tlwhs, online_ids, frame_id=frame_id, + fps=1. / timer.average_time) + if show_image: + cv2.imshow('online_im', online_im) + if save_dir is not None: + cv2.imwrite(os.path.join(save_dir, '{:05d}.jpg'.format(frame_id)), online_im) + frame_id += 1 + # save results + write_results(result_filename, results, data_type) + return frame_id, timer.average_time, timer.calls + + +def main(opt, data_root='/data/MOT16/train', det_root=None, seqs=('MOT16-05',), exp_name='demo', + save_images=False, save_videos=False, show_image=True): + logger.setLevel(logging.INFO) + result_root = os.path.join(data_root, '..', 'results', exp_name) + mkdir_if_missing(result_root) + data_type = 'mot' + + # Read config + cfg_dict = parse_model_cfg(opt.cfg) + opt.img_size = [int(cfg_dict[0]['width']), int(cfg_dict[0]['height'])] + + # run tracking + accs = [] + n_frame = 0 + timer_avgs, timer_calls = [], [] + for seq in seqs: + output_dir = os.path.join(data_root, '..','outputs', exp_name, seq) if save_images or save_videos else None + + logger.info('start seq: {}'.format(seq)) + dataloader = datasets.LoadImages(osp.join(data_root, seq, 'img1'), opt.img_size) + result_filename = os.path.join(result_root, '{}.txt'.format(seq)) + meta_info = open(os.path.join(data_root, seq, 'seqinfo.ini')).read() + frame_rate = int(meta_info[meta_info.find('frameRate')+10:meta_info.find('\nseqLength')]) + nf, ta, tc = eval_seq(opt, dataloader, data_type, result_filename, + save_dir=output_dir, show_image=show_image, frame_rate=frame_rate) + n_frame += nf + timer_avgs.append(ta) + timer_calls.append(tc) + + # eval + logger.info('Evaluate seq: {}'.format(seq)) + evaluator = Evaluator(data_root, seq, data_type) + accs.append(evaluator.eval_file(result_filename)) + if save_videos: + output_video_path = osp.join(output_dir, '{}.mp4'.format(seq)) + cmd_str = 'ffmpeg -f image2 -i {}/%05d.jpg -c:v copy {}'.format(output_dir, output_video_path) + os.system(cmd_str) + timer_avgs = np.asarray(timer_avgs) + timer_calls = np.asarray(timer_calls) + all_time = np.dot(timer_avgs, timer_calls) + avg_time = all_time / np.sum(timer_calls) + logger.info('Time elapsed: {:.2f} seconds, FPS: {:.2f}'.format(all_time, 1.0 / avg_time)) + + # get summary + metrics = mm.metrics.motchallenge_metrics + mh = mm.metrics.create() + summary = Evaluator.get_summary(accs, seqs, metrics) + strsummary = mm.io.render_summary( + summary, + formatters=mh.formatters, + namemap=mm.io.motchallenge_metric_names + ) + print(strsummary) + Evaluator.save_summary(summary, os.path.join(result_root, 'summary_{}.xlsx'.format(exp_name))) + + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(prog='track.py') + parser.add_argument('--cfg', type=str, default='cfg/yolov3_1088x608.cfg', help='cfg file path') + parser.add_argument('--weights', type=str, default='weights/latest.pt', help='path to weights file') + parser.add_argument('--iou-thres', type=float, default=0.5, help='iou threshold required to qualify as detected') + parser.add_argument('--conf-thres', type=float, default=0.7, help='object confidence threshold') + parser.add_argument('--nms-thres', type=float, default=0.4, help='iou threshold for non-maximum suppression') + parser.add_argument('--min-box-area', type=float, default=200, help='filter out tiny boxes') + parser.add_argument('--track-buffer', type=int, default=30, help='tracking buffer') + parser.add_argument('--test-mot16', action='store_true', help='tracking buffer') + parser.add_argument('--val-mot17', default=True, help='validation on MOT17') + parser.add_argument('--save-images', action='store_true', help='save tracking results (image)') + parser.add_argument('--save-videos', action='store_true', help='save tracking results (video)') + opt = parser.parse_args() + print(opt, end='\n\n') + + if not opt.test_mot16: + seqs_str = '''MOT17-02-SDP + MOT17-04-SDP + MOT17-05-SDP + MOT17-09-SDP + MOT17-10-SDP + MOT17-11-SDP + MOT17-13-SDP + ''' + #seqs_str = '''MOT17-02-SDP''' + data_root = '/opt/tiger/demo/datasets/MOT17/images/train' + else: + seqs_str = '''MOT16-01 + MOT16-03 + MOT16-06 + MOT16-07 + MOT16-08 + MOT16-12 + MOT16-14''' + data_root = '/home/wangzd/datasets/MOT/MOT16/images/test' + seqs = [seq.strip() for seq in seqs_str.split()] + + main(opt, + data_root=data_root, + seqs=seqs, + exp_name=opt.weights.split('/')[-2], + show_image=False, + save_images=opt.save_images, + save_videos=opt.save_videos) diff --git a/tutorials/jde/tracker.py b/tutorials/jde/tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..81b9653f94571a36e813b1ec938c42f9f0c01f67 --- /dev/null +++ b/tutorials/jde/tracker.py @@ -0,0 +1,414 @@ + +from collections import deque +import torch +import numpy as np +from utils.kalman_filter import KalmanFilter +from utils.log import logger +from models import * +from tracker import matching +from .basetrack import BaseTrack, TrackState + + +class STrack(BaseTrack): + + def __init__(self, tlwh, score, temp_feat, buffer_size=30): + + # wait activate + self._tlwh = np.asarray(tlwh, dtype=np.float) + self.kalman_filter = None + self.mean, self.covariance = None, None + self.is_activated = False + + self.score = score + self.tracklet_len = 0 + + self.smooth_feat = None + self.update_features(temp_feat) + self.features = deque([], maxlen=buffer_size) + self.alpha = 0.9 + + def update_features(self, feat): + feat /= np.linalg.norm(feat) + self.curr_feat = feat + if self.smooth_feat is None: + self.smooth_feat = feat + else: + self.smooth_feat = self.alpha *self.smooth_feat + (1-self.alpha) * feat + self.features.append(feat) + self.smooth_feat /= np.linalg.norm(self.smooth_feat) + + def predict(self): + mean_state = self.mean.copy() + if self.state != TrackState.Tracked: + mean_state[7] = 0 + self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) + + @staticmethod + def multi_predict(stracks, kalman_filter): + if len(stracks) > 0: + multi_mean = np.asarray([st.mean.copy() for st in stracks]) + multi_covariance = np.asarray([st.covariance for st in stracks]) + for i, st in enumerate(stracks): + if st.state != TrackState.Tracked: + multi_mean[i][7] = 0 +# multi_mean, multi_covariance = STrack.kalman_filter.multi_predict(multi_mean, multi_covariance) + multi_mean, multi_covariance = kalman_filter.multi_predict(multi_mean, multi_covariance) + for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): + stracks[i].mean = mean + stracks[i].covariance = cov + + def activate(self, kalman_filter, frame_id): + """Start a new tracklet""" + self.kalman_filter = kalman_filter + self.track_id = self.next_id() + self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) + + self.tracklet_len = 0 + self.state = TrackState.Tracked + #self.is_activated = True + self.frame_id = frame_id + self.start_frame = frame_id + + def re_activate(self, new_track, frame_id, new_id=False): + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) + ) + + self.update_features(new_track.curr_feat) + self.tracklet_len = 0 + self.state = TrackState.Tracked + self.is_activated = True + self.frame_id = frame_id + if new_id: + self.track_id = self.next_id() + + def update(self, new_track, frame_id, update_feature=True): + """ + Update a matched track + :type new_track: STrack + :type frame_id: int + :type update_feature: bool + :return: + """ + self.frame_id = frame_id + self.tracklet_len += 1 + + new_tlwh = new_track.tlwh + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) + self.state = TrackState.Tracked + self.is_activated = True + + self.score = new_track.score + if update_feature: + self.update_features(new_track.curr_feat) + + @property + def tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + """ + if self.mean is None: + return self._tlwh.copy() + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + @property + def tlbr(self): + """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + @staticmethod + def tlwh_to_xyah(tlwh): + """Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = np.asarray(tlwh).copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + def to_xyah(self): + return self.tlwh_to_xyah(self.tlwh) + + @staticmethod + def tlbr_to_tlwh(tlbr): + ret = np.asarray(tlbr).copy() + ret[2:] -= ret[:2] + return ret + + @staticmethod + def tlwh_to_tlbr(tlwh): + ret = np.asarray(tlwh).copy() + ret[2:] += ret[:2] + return ret + + def __repr__(self): + return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) + + +class JDETracker(object): + def __init__(self, opt, frame_rate=30): + self.opt = opt + self.model = Darknet(opt.cfg, nID=14455) + # load_darknet_weights(self.model, opt.weights) + self.model.load_state_dict(torch.load(opt.weights, map_location='cpu')['model'], strict=False) + self.model.cuda().eval() + + self.tracked_stracks = [] # type: list[STrack] + self.lost_stracks = [] # type: list[STrack] + self.removed_stracks = [] # type: list[STrack] + + self.frame_id = 0 + self.det_thresh = opt.conf_thres + self.init_thresh = self.det_thresh + 0.2 + self.low_thresh = 0.4 + self.buffer_size = int(frame_rate / 30.0 * opt.track_buffer) + self.max_time_lost = self.buffer_size + + self.kalman_filter = KalmanFilter() + + def update(self, im_blob, img0): + """ + Processes the image frame and finds bounding box(detections). + + Associates the detection with corresponding tracklets and also handles lost, removed, refound and active tracklets + + Parameters + ---------- + im_blob : torch.float32 + Tensor of shape depending upon the size of image. By default, shape of this tensor is [1, 3, 608, 1088] + + img0 : ndarray + ndarray of shape depending on the input image sequence. By default, shape is [608, 1080, 3] + + Returns + ------- + output_stracks : list of Strack(instances) + The list contains information regarding the online_tracklets for the recieved image tensor. + + """ + + self.frame_id += 1 + activated_starcks = [] # for storing active tracks, for the current frame + refind_stracks = [] # Lost Tracks whose detections are obtained in the current frame + lost_stracks = [] # The tracks which are not obtained in the current frame but are not removed.(Lost for some time lesser than the threshold for removing) + removed_stracks = [] + + t1 = time.time() + ''' Step 1: Network forward, get detections & embeddings''' + with torch.no_grad(): + pred = self.model(im_blob) + # pred is tensor of all the proposals (default number of proposals: 54264). Proposals have information associated with the bounding box and embeddings + pred = pred[pred[:, :, 4] > self.low_thresh] + # pred now has lesser number of proposals. Proposals rejected on basis of object confidence score + if len(pred) > 0: + dets = non_max_suppression(pred.unsqueeze(0), self.low_thresh, self.opt.nms_thres)[0].cpu() + # Final proposals are obtained in dets. Information of bounding box and embeddings also included + # Next step changes the detection scales + scale_coords(self.opt.img_size, dets[:, :4], img0.shape).round() + '''Detections is list of (x1, y1, x2, y2, object_conf, class_score, class_pred)''' + # class_pred is the embeddings. + + dets = dets.numpy() + remain_inds = dets[:, 4] > self.det_thresh + inds_low = dets[:, 4] > self.low_thresh + inds_high = dets[:, 4] < self.det_thresh + inds_second = np.logical_and(inds_low, inds_high) + dets_second = dets[inds_second] + dets = dets[remain_inds] + + detections = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for + (tlbrs, f) in zip(dets[:, :5], dets[:, 6:])] + else: + detections = [] + dets_second = [] + + t2 = time.time() + # print('Forward: {} s'.format(t2-t1)) + + ''' Add newly detected tracklets to tracked_stracks''' + unconfirmed = [] + tracked_stracks = [] # type: list[STrack] + for track in self.tracked_stracks: + if not track.is_activated: + # previous tracks which are not active in the current frame are added in unconfirmed list + unconfirmed.append(track) + # print("Should not be here, in unconfirmed") + else: + # Active tracks are added to the local list 'tracked_stracks' + tracked_stracks.append(track) + + ''' Step 2: First association, with embedding''' + # Combining currently tracked_stracks and lost_stracks + strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) + # Predict the current location with KF + STrack.multi_predict(strack_pool, self.kalman_filter) + + dists = matching.embedding_distance(strack_pool, detections) + dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) + #dists = matching.iou_distance(strack_pool, detections) + # The dists is the list of distances of the detection with the tracks in strack_pool + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.7) + # The matches is the array for corresponding matches of the detection with the corresponding strack_pool + + for itracked, idet in matches: + # itracked is the id of the track and idet is the detection + track = strack_pool[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + # If the track is active, add the detection to the track + track.update(detections[idet], self.frame_id) + activated_starcks.append(track) + else: + # We have obtained a detection from a track which is not active, hence put the track in refind_stracks list + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + # None of the steps below happen if there are no undetected tracks. + ''' Step 3: Second association, with IOU''' + detections = [detections[i] for i in u_detection] + # detections is now a list of the unmatched detections + r_tracked_stracks = [] # This is container for stracks which were tracked till the + # previous frame but no detection was found for it in the current frame + for i in u_track: + if strack_pool[i].state == TrackState.Tracked: + r_tracked_stracks.append(strack_pool[i]) + dists = matching.iou_distance(r_tracked_stracks, detections) + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) + # matches is the list of detections which matched with corresponding tracks by IOU distance method + for itracked, idet in matches: + track = r_tracked_stracks[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + # Same process done for some unmatched detections, but now considering IOU_distance as measure + + # association the untrack to the low score detections + if len(dets_second) > 0: + detections_second = [STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for + (tlbrs, f) in zip(dets_second[:, :5], dets_second[:, 6:])] + else: + detections_second = [] + second_tracked_stracks = [r_tracked_stracks[i] for i in u_track if r_tracked_stracks[i].state == TrackState.Tracked] + dists = matching.iou_distance(second_tracked_stracks, detections_second) + matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.4) + for itracked, idet in matches: + track = second_tracked_stracks[itracked] + det = detections_second[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + for it in u_track: + track = second_tracked_stracks[it] + if not track.state == TrackState.Lost: + track.mark_lost() + lost_stracks.append(track) + # If no detections are obtained for tracks (u_track), the tracks are added to lost_tracks list and are marked lost + + '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' + detections = [detections[i] for i in u_detection] + dists = matching.iou_distance(unconfirmed, detections) + matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) + for itracked, idet in matches: + unconfirmed[itracked].update(detections[idet], self.frame_id) + activated_starcks.append(unconfirmed[itracked]) + + # The tracks which are yet not matched + for it in u_unconfirmed: + track = unconfirmed[it] + track.mark_removed() + removed_stracks.append(track) + + # after all these confirmation steps, if a new detection is found, it is initialized for a new track + """ Step 4: Init new stracks""" + for inew in u_detection: + track = detections[inew] + if track.score < self.init_thresh: + continue + track.activate(self.kalman_filter, self.frame_id) + activated_starcks.append(track) + + """ Step 5: Update state""" + # If the tracks are lost for more frames than the threshold number, the tracks are removed. + for track in self.lost_stracks: + if self.frame_id - track.end_frame > self.max_time_lost: + track.mark_removed() + removed_stracks.append(track) + # print('Remained match {} s'.format(t4-t3)) + + # Update the self.tracked_stracks and self.lost_stracks using the updates in this step. + self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] + self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) + self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) + # self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost] # type: list[STrack] + self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) + self.lost_stracks.extend(lost_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) + self.removed_stracks.extend(removed_stracks) + self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) + + # get scores of lost tracks + output_stracks = [track for track in self.tracked_stracks if track.is_activated] + + logger.debug('===========Frame {}=========='.format(self.frame_id)) + logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) + logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) + logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) + logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) + # print('Final {} s'.format(t5-t4)) + return output_stracks + +def joint_stracks(tlista, tlistb): + exists = {} + res = [] + for t in tlista: + exists[t.track_id] = 1 + res.append(t) + for t in tlistb: + tid = t.track_id + if not exists.get(tid, 0): + exists[tid] = 1 + res.append(t) + return res + +def sub_stracks(tlista, tlistb): + stracks = {} + for t in tlista: + stracks[t.track_id] = t + for t in tlistb: + tid = t.track_id + if stracks.get(tid, 0): + del stracks[tid] + return list(stracks.values()) + +def remove_duplicate_stracks(stracksa, stracksb): + pdist = matching.iou_distance(stracksa, stracksb) + pairs = np.where(pdist<0.15) + dupa, dupb = list(), list() + for p,q in zip(*pairs): + timep = stracksa[p].frame_id - stracksa[p].start_frame + timeq = stracksb[q].frame_id - stracksb[q].start_frame + if timep > timeq: + dupb.append(q) + else: + dupa.append(p) + resa = [t for i,t in enumerate(stracksa) if not i in dupa] + resb = [t for i,t in enumerate(stracksb) if not i in dupb] + return resa, resb + + diff --git a/tutorials/motr/README.md b/tutorials/motr/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3fcc6ca471912eba104c258cc8a152f14673d813 --- /dev/null +++ b/tutorials/motr/README.md @@ -0,0 +1,100 @@ +# MOTR + +Step1. + +git clone https://github.com/megvii-model/MOTR.git and install + +replace https://github.com/megvii-model/MOTR/blob/main/datasets/joint.py + +replace https://github.com/megvii-model/MOTR/blob/main/datasets/transforms.py + + +train + +``` +python3 -m torch.distributed.launch --nproc_per_node=8 \ + --use_env main.py \ + --meta_arch motr \ + --dataset_file e2e_joint \ + --epoch 50 \ + --with_box_refine \ + --lr_drop 40 \ + --lr 2e-4 \ + --lr_backbone 2e-5 \ + --pretrained coco_model_final.pth \ + --output_dir exps/e2e_motr_r50_mot17trainhalf \ + --batch_size 1 \ + --sample_mode 'random_interval' \ + --sample_interval 10 \ + --sampler_steps 10 20 30 \ + --sampler_lengths 2 3 4 5 \ + --update_query_pos \ + --merger_dropout 0 \ + --dropout 0 \ + --random_drop 0.1 \ + --fp_ratio 0.3 \ + --query_interaction_layer 'QIM' \ + --extra_track_attn \ + --mot_path . + --data_txt_path_train ./datasets/data_path/mot17.half \ + --data_txt_path_val ./datasets/data_path/mot17.val \ +``` +mot17.half and mot17.val are from https://github.com/ifzhang/FairMOT/tree/master/src/data + +You can also download the MOTR model trained by us: [google](https://drive.google.com/file/d/1pzGi53VooppQqhKf3TSxLK99LERsVyTw/view?usp=sharing), [baidu(code:t87h)](https://pan.baidu.com/s/1OrcR3L9Bf2xXIo8RQl3zyA) + + +Step2. + +replace https://github.com/megvii-model/MOTR/blob/main/util/evaluation.py + +replace https://github.com/megvii-model/MOTR/blob/main/eval.py + +replace https://github.com/megvii-model/MOTR/blob/main/models/motr.py + +add byte_tracker.py to https://github.com/megvii-model/MOTR + +add mot_online to https://github.com/megvii-model/MOTR + + +Step3. + + +val + +``` +python3 eval.py \ + --meta_arch motr \ + --dataset_file e2e_joint \ + --epoch 200 \ + --with_box_refine \ + --lr_drop 100 \ + --lr 2e-4 \ + --lr_backbone 2e-5 \ + --pretrained exps/e2e_motr_r50_mot17val/motr_final.pth \ + --output_dir exps/e2e_motr_r50_mot17val \ + --batch_size 1 \ + --sample_mode 'random_interval' \ + --sample_interval 10 \ + --sampler_steps 50 90 120 \ + --sampler_lengths 2 3 4 5 \ + --update_query_pos \ + --merger_dropout 0 \ + --dropout 0 \ + --random_drop 0.1 \ + --fp_ratio 0.3 \ + --query_interaction_layer 'QIM' \ + --extra_track_attn \ + --mot_path ./MOT17/images/train + --data_txt_path_train ./datasets/data_path/mot17.half \ + --data_txt_path_val ./datasets/data_path/mot17.val \ + --resume model_final.pth \ +``` + + + +# MOTR det + +in Step2, replace https://github.com/megvii-model/MOTR/blob/main/models/motr.py by motr_det.py + +others are the same as MOTR diff --git a/tutorials/motr/byte_tracker.py b/tutorials/motr/byte_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..d5bc6dd479441e78e92bd07ce496314d8de13d38 --- /dev/null +++ b/tutorials/motr/byte_tracker.py @@ -0,0 +1,339 @@ +import numpy as np +from collections import deque +import os +import os.path as osp +import copy +import torch +import torch.nn.functional as F + +from mot_online.kalman_filter import KalmanFilter +from mot_online.basetrack import BaseTrack, TrackState +from mot_online import matching + + + +class STrack(BaseTrack): + shared_kalman = KalmanFilter() + def __init__(self, tlwh, score): + + # wait activate + self._tlwh = np.asarray(tlwh, dtype=np.float) + self.kalman_filter = None + self.mean, self.covariance = None, None + self.is_activated = False + + self.score = score + self.tracklet_len = 0 + + def predict(self): + mean_state = self.mean.copy() + if self.state != TrackState.Tracked: + mean_state[7] = 0 + self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) + + @staticmethod + def multi_predict(stracks): + if len(stracks) > 0: + multi_mean = np.asarray([st.mean.copy() for st in stracks]) + multi_covariance = np.asarray([st.covariance for st in stracks]) + for i, st in enumerate(stracks): + if st.state != TrackState.Tracked: + multi_mean[i][7] = 0 + multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance) + for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): + stracks[i].mean = mean + stracks[i].covariance = cov + + def activate(self, kalman_filter, frame_id): + """Start a new tracklet""" + self.kalman_filter = kalman_filter + self.track_id = self.next_id() + self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) + + self.tracklet_len = 0 + self.state = TrackState.Tracked + if frame_id == 1: + self.is_activated = True + # self.is_activated = True + self.frame_id = frame_id + self.start_frame = frame_id + + def re_activate(self, new_track, frame_id, new_id=False): + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) + ) + self.tracklet_len = 0 + self.state = TrackState.Tracked + self.is_activated = True + self.frame_id = frame_id + if new_id: + self.track_id = self.next_id() + self.score = new_track.score + + def update(self, new_track, frame_id): + """ + Update a matched track + :type new_track: STrack + :type frame_id: int + :type update_feature: bool + :return: + """ + self.frame_id = frame_id + self.tracklet_len += 1 + + new_tlwh = new_track.tlwh + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) + self.state = TrackState.Tracked + self.is_activated = True + + self.score = new_track.score + + @property + # @jit(nopython=True) + def tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + """ + if self.mean is None: + return self._tlwh.copy() + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + @property + # @jit(nopython=True) + def tlbr(self): + """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_xyah(tlwh): + """Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = np.asarray(tlwh).copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + def to_xyah(self): + return self.tlwh_to_xyah(self.tlwh) + + @staticmethod + # @jit(nopython=True) + def tlbr_to_tlwh(tlbr): + ret = np.asarray(tlbr).copy() + ret[2:] -= ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_tlbr(tlwh): + ret = np.asarray(tlwh).copy() + ret[2:] += ret[:2] + return ret + + def __repr__(self): + return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) + + +class BYTETracker(object): + def __init__(self, frame_rate=30): + self.tracked_stracks = [] # type: list[STrack] + self.lost_stracks = [] # type: list[STrack] + self.removed_stracks = [] # type: list[STrack] + + self.frame_id = 0 + + self.low_thresh = 0.2 + self.track_thresh = 0.8 + self.det_thresh = self.track_thresh + 0.1 + + + self.buffer_size = int(frame_rate / 30.0 * 30) + self.max_time_lost = self.buffer_size + self.kalman_filter = KalmanFilter() + + def update(self, output_results): + self.frame_id += 1 + activated_starcks = [] + refind_stracks = [] + lost_stracks = [] + removed_stracks = [] + + + scores = output_results[:, 4] + bboxes = output_results[:, :4] # x1y1x2y2 + + remain_inds = scores > self.track_thresh + dets = bboxes[remain_inds] + scores_keep = scores[remain_inds] + + + inds_low = scores > self.low_thresh + inds_high = scores < self.track_thresh + inds_second = np.logical_and(inds_low, inds_high) + dets_second = bboxes[inds_second] + scores_second = scores[inds_second] + + + if len(dets) > 0: + '''Detections''' + detections = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for + (tlbr, s) in zip(dets, scores_keep)] + else: + detections = [] + + ''' Add newly detected tracklets to tracked_stracks''' + unconfirmed = [] + tracked_stracks = [] # type: list[STrack] + for track in self.tracked_stracks: + if not track.is_activated: + unconfirmed.append(track) + else: + tracked_stracks.append(track) + + ''' Step 2: First association, with Kalman and IOU''' + strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) + # Predict the current location with KF + STrack.multi_predict(strack_pool) + dists = matching.iou_distance(strack_pool, detections) + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.8) + + for itracked, idet in matches: + track = strack_pool[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + track.update(detections[idet], self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + ''' Step 3: Second association, with IOU''' + # association the untrack to the low score detections + if len(dets_second) > 0: + '''Detections''' + detections_second = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for + (tlbr, s) in zip(dets_second, scores_second)] + else: + detections_second = [] + r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] + dists = matching.iou_distance(r_tracked_stracks, detections_second) + matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5) + for itracked, idet in matches: + track = r_tracked_stracks[itracked] + det = detections_second[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + for it in u_track: + #track = r_tracked_stracks[it] + track = r_tracked_stracks[it] + if not track.state == TrackState.Lost: + track.mark_lost() + lost_stracks.append(track) + + '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' + detections = [detections[i] for i in u_detection] + dists = matching.iou_distance(unconfirmed, detections) + matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) + for itracked, idet in matches: + unconfirmed[itracked].update(detections[idet], self.frame_id) + activated_starcks.append(unconfirmed[itracked]) + for it in u_unconfirmed: + track = unconfirmed[it] + track.mark_removed() + removed_stracks.append(track) + + """ Step 4: Init new stracks""" + for inew in u_detection: + track = detections[inew] + if track.score < self.det_thresh: + continue + track.activate(self.kalman_filter, self.frame_id) + activated_starcks.append(track) + """ Step 5: Update state""" + for track in self.lost_stracks: + if self.frame_id - track.end_frame > self.max_time_lost: + track.mark_removed() + removed_stracks.append(track) + + # print('Ramained match {} s'.format(t4-t3)) + + self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] + self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) + self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) + self.lost_stracks.extend(lost_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) + self.removed_stracks.extend(removed_stracks) + self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) + # get scores of lost tracks + output_stracks = [track for track in self.tracked_stracks if track.is_activated] + + return output_stracks + + +def joint_stracks(tlista, tlistb): + exists = {} + res = [] + for t in tlista: + exists[t.track_id] = 1 + res.append(t) + for t in tlistb: + tid = t.track_id + if not exists.get(tid, 0): + exists[tid] = 1 + res.append(t) + return res + + +def sub_stracks(tlista, tlistb): + stracks = {} + for t in tlista: + stracks[t.track_id] = t + for t in tlistb: + tid = t.track_id + if stracks.get(tid, 0): + del stracks[tid] + return list(stracks.values()) + + +def remove_duplicate_stracks(stracksa, stracksb): + pdist = matching.iou_distance(stracksa, stracksb) + pairs = np.where(pdist < 0.15) + dupa, dupb = list(), list() + for p, q in zip(*pairs): + timep = stracksa[p].frame_id - stracksa[p].start_frame + timeq = stracksb[q].frame_id - stracksb[q].start_frame + if timep > timeq: + dupb.append(q) + else: + dupa.append(p) + resa = [t for i, t in enumerate(stracksa) if not i in dupa] + resb = [t for i, t in enumerate(stracksb) if not i in dupb] + return resa, resb + + +def remove_fp_stracks(stracksa, n_frame=10): + remain = [] + for t in stracksa: + score_5 = t.score_list[-n_frame:] + score_5 = np.array(score_5, dtype=np.float32) + index = score_5 < 0.45 + num = np.sum(index) + if num < n_frame: + remain.append(t) + return remain diff --git a/tutorials/motr/eval.py b/tutorials/motr/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..fbbb8e5600fb762fa586d898c4477ebb82eae374 --- /dev/null +++ b/tutorials/motr/eval.py @@ -0,0 +1,470 @@ +# ------------------------------------------------------------------------ +# Copyright (c) 2021 megvii-model. All Rights Reserved. +# ------------------------------------------------------------------------ +# Modified from Deformable DETR (https://github.com/fundamentalvision/Deformable-DETR) +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ + +""" + SORT: A Simple, Online and Realtime Tracker + Copyright (C) 2016-2020 Alex Bewley alex@bewley.ai + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" +from __future__ import print_function + +import os +import numpy as np +import random +import argparse +import torchvision.transforms.functional as F +import torch +import cv2 +from tqdm import tqdm +from pathlib import Path +from PIL import Image, ImageDraw +from models import build_model +from util.tool import load_model +from main import get_args_parser +from torch.nn.functional import interpolate +from typing import List +from util.evaluation import Evaluator +import motmetrics as mm +import shutil + +from detectron2.structures import Instances + +from tracker import BYTETracker + +np.random.seed(2020) + +COLORS_10 = [(144, 238, 144), (178, 34, 34), (221, 160, 221), (0, 255, 0), (0, 128, 0), (210, 105, 30), (220, 20, 60), + (192, 192, 192), (255, 228, 196), (50, 205, 50), (139, 0, 139), (100, 149, 237), (138, 43, 226), + (238, 130, 238), + (255, 0, 255), (0, 100, 0), (127, 255, 0), (255, 0, 255), (0, 0, 205), (255, 140, 0), (255, 239, 213), + (199, 21, 133), (124, 252, 0), (147, 112, 219), (106, 90, 205), (176, 196, 222), (65, 105, 225), + (173, 255, 47), + (255, 20, 147), (219, 112, 147), (186, 85, 211), (199, 21, 133), (148, 0, 211), (255, 99, 71), + (144, 238, 144), + (255, 255, 0), (230, 230, 250), (0, 0, 255), (128, 128, 0), (189, 183, 107), (255, 255, 224), + (128, 128, 128), + (105, 105, 105), (64, 224, 208), (205, 133, 63), (0, 128, 128), (72, 209, 204), (139, 69, 19), + (255, 245, 238), + (250, 240, 230), (152, 251, 152), (0, 255, 255), (135, 206, 235), (0, 191, 255), (176, 224, 230), + (0, 250, 154), + (245, 255, 250), (240, 230, 140), (245, 222, 179), (0, 139, 139), (143, 188, 143), (255, 0, 0), + (240, 128, 128), + (102, 205, 170), (60, 179, 113), (46, 139, 87), (165, 42, 42), (178, 34, 34), (175, 238, 238), + (255, 248, 220), + (218, 165, 32), (255, 250, 240), (253, 245, 230), (244, 164, 96), (210, 105, 30)] + + +def plot_one_box(x, img, color=None, label=None, score=None, line_thickness=None): + # Plots one bounding box on image img + + tl = line_thickness or round( + 0.002 * max(img.shape[0:2])) + 1 # line thickness + color = color or [random.randint(0, 255) for _ in range(3)] + c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) + cv2.rectangle(img, c1, c2, color, thickness=tl) + # if label: + # tf = max(tl - 1, 1) # font thickness + # t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] + # c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 + # cv2.rectangle(img, c1, c2, color, -1) # filled + # cv2.putText(img, + # label, (c1[0], c1[1] - 2), + # 0, + # tl / 3, [225, 255, 255], + # thickness=tf, + # lineType=cv2.LINE_AA) + # if score is not None: + # cv2.putText(img, score, (c1[0], c1[1] + 30), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA) + return img + + +def draw_bboxes(ori_img, bbox, identities=None, offset=(0, 0), cvt_color=False): + if cvt_color: + ori_img = cv2.cvtColor(np.asarray(ori_img), cv2.COLOR_RGB2BGR) + img = ori_img + for i, box in enumerate(bbox): + x1, y1, x2, y2 = [int(i) for i in box[:4]] + x1 += offset[0] + x2 += offset[0] + y1 += offset[1] + y2 += offset[1] + if len(box) > 4: + score = '{:.2f}'.format(box[4]) + else: + score = None + # box text and bar + id = int(identities[i]) if identities is not None else 0 + color = COLORS_10[id % len(COLORS_10)] + label = '{:d}'.format(id) + # t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0] + img = plot_one_box([x1, y1, x2, y2], img, color, label, score=score) + return img + + +def draw_points(img: np.ndarray, points: np.ndarray, color=(255, 255, 255)) -> np.ndarray: + assert len(points.shape) == 2 and points.shape[1] == 2, 'invalid points shape: {}'.format(points.shape) + for i, (x, y) in enumerate(points): + if i >= 300: + color = (0, 255, 0) + cv2.circle(img, (int(x), int(y)), 2, color=color, thickness=2) + return img + + +def tensor_to_numpy(tensor: torch.Tensor) -> np.ndarray: + return tensor.detach().cpu().numpy() + + +class Track(object): + track_cnt = 0 + + def __init__(self, box): + self.box = box + self.time_since_update = 0 + self.id = Track.track_cnt + Track.track_cnt += 1 + self.miss = 0 + + def miss_one_frame(self): + self.miss += 1 + + def clear_miss(self): + self.miss = 0 + + def update(self, box): + self.box = box + self.clear_miss() + + +def write_results(filename, results): + save_format = '{frame},{id},{x1},{y1},{w},{h},{s},-1,-1,-1\n' + with open(filename, 'w') as f: + for frame_id, tlwhs, track_ids, scores in results: + for tlwh, track_id, score in zip(tlwhs, track_ids, scores): + if track_id < 0: + continue + x1, y1, w, h = tlwh + line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1), s=round(score, 2)) + f.write(line) + logger.info('save results to {}'.format(filename)) + + +class MOTR(object): + def __init__(self, max_age=1, min_hits=3, iou_threshold=0.3): + self.tracker = BYTETracker() + + def update(self, dt_instances: Instances): + ret = [] + for i in range(len(dt_instances)): + label = dt_instances.labels[i] + if label == 0: + id = dt_instances.obj_idxes[i] + box_with_score = np.concatenate([dt_instances.boxes[i], dt_instances.scores[i:i+1]], axis=-1) + ret.append(np.concatenate((box_with_score, [id + 1])).reshape(1, -1)) # +1 as MOT benchmark requires positive + + if len(ret) > 0: + online_targets = self.tracker.update(np.concatenate(ret)) + + online_ret = [] + for t in online_targets: + online_ret.append(np.array([t.tlbr[0], t.tlbr[1], t.tlbr[2], t.tlbr[3], t.score, t.track_id]).reshape(1, -1)) + + if len(online_ret) > 0: + return np.concatenate(online_ret) + + return np.empty((0, 6)) + + + +def load_label(label_path: str, img_size: tuple) -> dict: + labels0 = np.loadtxt(label_path, dtype=np.float32).reshape(-1, 6) + h, w = img_size + # Normalized cewh to pixel xyxy format + labels = labels0.copy() + labels[:, 2] = w * (labels0[:, 2] - labels0[:, 4] / 2) + labels[:, 3] = h * (labels0[:, 3] - labels0[:, 5] / 2) + labels[:, 4] = w * (labels0[:, 2] + labels0[:, 4] / 2) + labels[:, 5] = h * (labels0[:, 3] + labels0[:, 5] / 2) + targets = {'boxes': [], 'labels': [], 'area': []} + num_boxes = len(labels) + + visited_ids = set() + for label in labels[:num_boxes]: + obj_id = label[1] + if obj_id in visited_ids: + continue + visited_ids.add(obj_id) + targets['boxes'].append(label[2:6].tolist()) + targets['area'].append(label[4] * label[5]) + targets['labels'].append(0) + targets['boxes'] = np.asarray(targets['boxes']) + targets['area'] = np.asarray(targets['area']) + targets['labels'] = np.asarray(targets['labels']) + return targets + + +def filter_pub_det(res_file, pub_det_file, filter_iou=False): + frame_boxes = {} + with open(pub_det_file, 'r') as f: + lines = f.readlines() + for line in lines: + if len(line) == 0: + continue + elements = line.strip().split(',') + frame_id = int(elements[0]) + x1, y1, w, h = elements[2:6] + x1, y1, w, h = float(x1), float(y1), float(w), float(h) + x2 = x1 + w - 1 + y2 = y1 + h - 1 + if frame_id not in frame_boxes: + frame_boxes[frame_id] = [] + frame_boxes[frame_id].append([x1, y1, x2, y2]) + + for frame, boxes in frame_boxes.items(): + frame_boxes[frame] = np.array(boxes) + + ids = {} + num_filter_box = 0 + with open(res_file, 'r') as f: + lines = list(f.readlines()) + with open(res_file, 'w') as f: + for line in lines: + if len(line) == 0: + continue + elements = line.strip().split(',') + frame_id, obj_id = elements[:2] + frame_id = int(frame_id) + obj_id = int(obj_id) + x1, y1, w, h = elements[2:6] + x1, y1, w, h = float(x1), float(y1), float(w), float(h) + x2 = x1 + w - 1 + y2 = y1 + h - 1 + if obj_id not in ids: + # track initialization. + if frame_id not in frame_boxes: + num_filter_box += 1 + print("filter init box {} {}".format(frame_id, obj_id)) + continue + pub_dt_boxes = frame_boxes[frame_id] + dt_box = np.array([[x1, y1, x2, y2]]) + if filter_iou: + max_iou = bbox_iou(dt_box, pub_dt_boxes).max() + if max_iou < 0.5: + num_filter_box += 1 + print("filter init box {} {}".format(frame_id, obj_id)) + continue + else: + pub_dt_centers = (pub_dt_boxes[:, :2] + pub_dt_boxes[:, 2:4]) * 0.5 + x_inside = (dt_box[0, 0] <= pub_dt_centers[:, 0]) & (dt_box[0, 2] >= pub_dt_centers[:, 0]) + y_inside = (dt_box[0, 1] <= pub_dt_centers[:, 1]) & (dt_box[0, 3] >= pub_dt_centers[:, 1]) + center_inside: np.ndarray = x_inside & y_inside + if not center_inside.any(): + num_filter_box += 1 + print("filter init box {} {}".format(frame_id, obj_id)) + continue + print("save init track {} {}".format(frame_id, obj_id)) + ids[obj_id] = True + f.write(line) + + print("totally {} boxes are filtered.".format(num_filter_box)) + + +class Detector(object): + def __init__(self, args, model=None, seq_num=2): + + self.args = args + self.detr = model + + self.seq_num = seq_num + img_list = os.listdir(os.path.join(self.args.mot_path, self.seq_num, 'img1')) + img_list = [os.path.join(self.args.mot_path, self.seq_num, 'img1', _) for _ in img_list if + ('jpg' in _) or ('png' in _)] + + self.img_list = sorted(img_list) + self.img_len = len(self.img_list) + self.tr_tracker = MOTR() + + ''' + common settings + ''' + self.img_height = 800 + self.img_width = 1536 + self.mean = [0.485, 0.456, 0.406] + self.std = [0.229, 0.224, 0.225] + + self.save_path = os.path.join(self.args.output_dir, 'results/{}'.format(seq_num)) + os.makedirs(self.save_path, exist_ok=True) + + self.predict_path = os.path.join(self.args.output_dir, 'preds', self.seq_num) + os.makedirs(self.predict_path, exist_ok=True) + if os.path.exists(os.path.join(self.predict_path, 'gt.txt')): + os.remove(os.path.join(self.predict_path, 'gt.txt')) + + def load_img_from_file(self,f_path): + label_path = f_path.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt') + cur_img = cv2.imread(f_path) + cur_img = cv2.cvtColor(cur_img, cv2.COLOR_BGR2RGB) + targets = load_label(label_path, cur_img.shape[:2]) if os.path.exists(label_path) else None + return cur_img, targets + + def init_img(self, img): + ori_img = img.copy() + self.seq_h, self.seq_w = img.shape[:2] + scale = self.img_height / min(self.seq_h, self.seq_w) + if max(self.seq_h, self.seq_w) * scale > self.img_width: + scale = self.img_width / max(self.seq_h, self.seq_w) + target_h = int(self.seq_h * scale) + target_w = int(self.seq_w * scale) + img = cv2.resize(img, (target_w, target_h)) + img = F.normalize(F.to_tensor(img), self.mean, self.std) + img = img.unsqueeze(0) + return img, ori_img + + @staticmethod + def filter_dt_by_score(dt_instances: Instances, prob_threshold: float) -> Instances: + keep = dt_instances.scores > prob_threshold + return dt_instances[keep] + + @staticmethod + def filter_dt_by_area(dt_instances: Instances, area_threshold: float) -> Instances: + wh = dt_instances.boxes[:, 2:4] - dt_instances.boxes[:, 0:2] + areas = wh[:, 0] * wh[:, 1] + keep = areas > area_threshold + return dt_instances[keep] + + @staticmethod + def write_results(txt_path, frame_id, bbox_xyxy, identities): + save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n' + with open(txt_path, 'a') as f: + for xyxy, track_id in zip(bbox_xyxy, identities): + if track_id < 0 or track_id is None: + continue + x1, y1, x2, y2 = xyxy + w, h = x2 - x1, y2 - y1 + line = save_format.format(frame=int(frame_id), id=int(track_id), x1=x1, y1=y1, w=w, h=h) + f.write(line) + + def eval_seq(self): + data_root = os.path.join(self.args.mot_path) + result_filename = os.path.join(self.predict_path, 'gt.txt') + evaluator = Evaluator(data_root, self.seq_num) + accs = evaluator.eval_file(result_filename) + return accs + + @staticmethod + def visualize_img_with_bbox(img_path, img, dt_instances: Instances, ref_pts=None, gt_boxes=None): + img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) + if dt_instances.has('scores'): + img_show = draw_bboxes(img, np.concatenate([dt_instances.boxes, dt_instances.scores.reshape(-1, 1)], axis=-1), dt_instances.obj_idxes) + else: + img_show = draw_bboxes(img, dt_instances.boxes, dt_instances.obj_idxes) +# if ref_pts is not None: +# img_show = draw_points(img_show, ref_pts) +# if gt_boxes is not None: +# img_show = draw_bboxes(img_show, gt_boxes, identities=np.ones((len(gt_boxes), )) * -1) + cv2.imwrite(img_path, img_show) + + def detect(self, prob_threshold=0.2, area_threshold=100, vis=False): + total_dts = 0 + track_instances = None + max_id = 0 + + # we only consider val split (second half images) + for i in tqdm(range((int(self.img_len / 2)), self.img_len)): +# for i in tqdm(range(0, self.img_len)): + img, targets = self.load_img_from_file(self.img_list[i]) + cur_img, ori_img = self.init_img(img) + + # track_instances = None + if track_instances is not None: + track_instances.remove('boxes') + track_instances.remove('labels') + + res = self.detr.inference_single_image(cur_img.cuda().float(), (self.seq_h, self.seq_w), track_instances) + track_instances = res['track_instances'] + max_id = max(max_id, track_instances.obj_idxes.max().item()) + + print("ref points.shape={}".format(res['ref_pts'].shape)) + all_ref_pts = tensor_to_numpy(res['ref_pts'][0, :, :2]) + dt_instances = track_instances.to(torch.device('cpu')) + + # filter det instances by score. + dt_instances = self.filter_dt_by_score(dt_instances, prob_threshold) + dt_instances = self.filter_dt_by_area(dt_instances, area_threshold) + + total_dts += len(dt_instances) + + if vis: + # for visual + cur_vis_img_path = os.path.join(self.save_path, 'frame_{:0>8d}.jpg'.format(i)) + gt_boxes = None + self.visualize_img_with_bbox(cur_vis_img_path, ori_img, dt_instances, ref_pts=all_ref_pts, gt_boxes=gt_boxes) + + tracker_outputs = self.tr_tracker.update(dt_instances) + + self.write_results(txt_path=os.path.join(self.predict_path, 'gt.txt'), + frame_id=(i + 1), + bbox_xyxy=tracker_outputs[:, :4], + identities=tracker_outputs[:, 5]) + print("totally {} dts max_id={}".format(total_dts, max_id)) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser('DETR training and evaluation script', parents=[get_args_parser()]) + args = parser.parse_args() + if args.output_dir: + Path(args.output_dir).mkdir(parents=True, exist_ok=True) + + # load model and weights + detr, _, _ = build_model(args) + checkpoint = torch.load(args.resume, map_location='cpu') + detr = load_model(detr, args.resume) + detr = detr.cuda() + detr.eval() + +# seq_nums = ['ADL-Rundle-6', 'ETH-Bahnhof', 'KITTI-13', 'PETS09-S2L1', 'TUD-Stadtmitte', 'ADL-Rundle-8', 'KITTI-17', +# 'ETH-Pedcross2', 'ETH-Sunnyday', 'TUD-Campus', 'Venice-2'] + seq_nums = ['MOT17-02-SDP', + 'MOT17-04-SDP', + 'MOT17-05-SDP', + 'MOT17-09-SDP', + 'MOT17-10-SDP', + 'MOT17-11-SDP', + 'MOT17-13-SDP'] + accs = [] + seqs = [] + + for seq_num in seq_nums: + print("solve {}".format(seq_num)) + det = Detector(args, model=detr, seq_num=seq_num) + det.detect(vis=False) + accs.append(det.eval_seq()) + seqs.append(seq_num) + + metrics = mm.metrics.motchallenge_metrics + mh = mm.metrics.create() + summary = Evaluator.get_summary(accs, seqs, metrics) + strsummary = mm.io.render_summary( + summary, + formatters=mh.formatters, + namemap=mm.io.motchallenge_metric_names + ) + print(strsummary) + with open("eval_log.txt", 'a') as f: + print(strsummary, file=f) diff --git a/tutorials/motr/evaluation.py b/tutorials/motr/evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..2be0d672e160e78361f94916e319cd5ee5f2310d --- /dev/null +++ b/tutorials/motr/evaluation.py @@ -0,0 +1,207 @@ +# ------------------------------------------------------------------------ +# Copyright (c) 2021 megvii-model. All Rights Reserved. +# ------------------------------------------------------------------------ +# Modified from Deformable DETR (https://github.com/fundamentalvision/Deformable-DETR) +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ + + +import os +import numpy as np +import copy +import motmetrics as mm +mm.lap.default_solver = 'lap' +import os +from typing import Dict +import numpy as np +import logging + +def read_results(filename, data_type: str, is_gt=False, is_ignore=False): + if data_type in ('mot', 'lab'): + read_fun = read_mot_results + else: + raise ValueError('Unknown data type: {}'.format(data_type)) + + return read_fun(filename, is_gt, is_ignore) + +# def read_mot_results(filename, is_gt, is_ignore): +# results_dict = dict() +# if os.path.isfile(filename): +# with open(filename, 'r') as f: +# for line in f.readlines(): +# linelist = line.split(',') +# if len(linelist) < 7: +# continue +# fid = int(linelist[0]) +# if fid < 1: +# continue +# results_dict.setdefault(fid, list()) + +# if is_gt: +# mark = int(float(linelist[6])) +# if mark == 0 : +# continue +# score = 1 +# elif is_ignore: +# score = 1 +# else: +# score = float(linelist[6]) + +# tlwh = tuple(map(float, linelist[2:6])) +# target_id = int(float(linelist[1])) +# results_dict[fid].append((tlwh, target_id, score)) + +# return results_dict + +def read_mot_results(filename, is_gt, is_ignore): + valid_labels = {1} + ignore_labels = {0, 2, 7, 8, 12} + results_dict = dict() + if os.path.isfile(filename): + with open(filename, 'r') as f: + for line in f.readlines(): + linelist = line.split(',') + if len(linelist) < 7: + continue + fid = int(linelist[0]) + if fid < 1: + continue + results_dict.setdefault(fid, list()) + + if is_gt: + if 'MOT16-' in filename or 'MOT17-' in filename: + label = int(float(linelist[7])) + mark = int(float(linelist[6])) + if mark == 0 or label not in valid_labels: + continue + score = 1 + elif is_ignore: + if 'MOT16-' in filename or 'MOT17-' in filename: + label = int(float(linelist[7])) + vis_ratio = float(linelist[8]) + if label not in ignore_labels and vis_ratio >= 0: + continue + elif 'MOT15' in filename: + label = int(float(linelist[6])) + if label not in ignore_labels: + continue + else: + continue + score = 1 + else: + score = float(linelist[6]) + + tlwh = tuple(map(float, linelist[2:6])) + target_id = int(linelist[1]) + + results_dict[fid].append((tlwh, target_id, score)) + + return results_dict + +def unzip_objs(objs): + if len(objs) > 0: + tlwhs, ids, scores = zip(*objs) + else: + tlwhs, ids, scores = [], [], [] + tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4) + return tlwhs, ids, scores + + +class Evaluator(object): + def __init__(self, data_root, seq_name, data_type='mot'): + + self.data_root = data_root + self.seq_name = seq_name + self.data_type = data_type + + self.load_annotations() + self.reset_accumulator() + + def load_annotations(self): + assert self.data_type == 'mot' + + gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt') + self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True) + self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True) + + def reset_accumulator(self): + self.acc = mm.MOTAccumulator(auto_id=True) + + def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False): + # results + trk_tlwhs = np.copy(trk_tlwhs) + trk_ids = np.copy(trk_ids) + + # gts + gt_objs = self.gt_frame_dict.get(frame_id, []) + gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2] + + # ignore boxes + ignore_objs = self.gt_ignore_frame_dict.get(frame_id, []) + ignore_tlwhs = unzip_objs(ignore_objs)[0] + # remove ignored results + keep = np.ones(len(trk_tlwhs), dtype=bool) + iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5) + if len(iou_distance) > 0: + match_is, match_js = mm.lap.linear_sum_assignment(iou_distance) + match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js]) + match_ious = iou_distance[match_is, match_js] + + match_js = np.asarray(match_js, dtype=int) + match_js = match_js[np.logical_not(np.isnan(match_ious))] + keep[match_js] = False + trk_tlwhs = trk_tlwhs[keep] + trk_ids = trk_ids[keep] + + # get distance matrix + iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5) + + # acc + self.acc.update(gt_ids, trk_ids, iou_distance) + + if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'): + events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics + else: + events = None + return events + + def eval_file(self, filename): + self.reset_accumulator() + + result_frame_dict = read_results(filename, self.data_type, is_gt=False) + #frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys()))) + frames = sorted(list(set(result_frame_dict.keys()))) + + for frame_id in frames: + trk_objs = result_frame_dict.get(frame_id, []) + trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2] + self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False) + + return self.acc + + @staticmethod + def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')): + names = copy.deepcopy(names) + if metrics is None: + metrics = mm.metrics.motchallenge_metrics + metrics = copy.deepcopy(metrics) + + mh = mm.metrics.create() + summary = mh.compute_many( + accs, + metrics=metrics, + names=names, + generate_overall=True + ) + + return summary + + @staticmethod + def save_summary(summary, filename): + import pandas as pd + writer = pd.ExcelWriter(filename) + summary.to_excel(writer) + writer.save() diff --git a/tutorials/motr/joint.py b/tutorials/motr/joint.py new file mode 100644 index 0000000000000000000000000000000000000000..65800a9ceff315f8733adbe967c9d8c0daa0f0e5 --- /dev/null +++ b/tutorials/motr/joint.py @@ -0,0 +1,292 @@ +# ------------------------------------------------------------------------ +# Copyright (c) 2021 megvii-model. All Rights Reserved. +# ------------------------------------------------------------------------ +# Modified from Deformable DETR (https://github.com/fundamentalvision/Deformable-DETR) +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ + +""" +MOT dataset which returns image_id for evaluation. +""" +from pathlib import Path +import cv2 +import numpy as np +import torch +import torch.utils.data +import os.path as osp +from PIL import Image, ImageDraw +import copy +import datasets.transforms as T +from models.structures import Instances + + +class DetMOTDetection: + def __init__(self, args, data_txt_path: str, seqs_folder, dataset2transform): + self.args = args + self.dataset2transform = dataset2transform + self.num_frames_per_batch = max(args.sampler_lengths) + self.sample_mode = args.sample_mode + self.sample_interval = args.sample_interval + self.vis = args.vis + self.video_dict = {} + + with open(data_txt_path, 'r') as file: + self.img_files = file.readlines() + self.img_files = [osp.join(seqs_folder, x.strip()) for x in self.img_files] + self.img_files = list(filter(lambda x: len(x) > 0, self.img_files)) + + self.label_files = [(x.replace('images', 'labels_with_ids').replace('.png', '.txt').replace('.jpg', '.txt')) + for x in self.img_files] + # The number of images per sample: 1 + (num_frames - 1) * interval. + # The number of valid samples: num_images - num_image_per_sample + 1. + self.item_num = len(self.img_files) - (self.num_frames_per_batch - 1) * self.sample_interval + + self._register_videos() + + # video sampler. + self.sampler_steps: list = args.sampler_steps + self.lengths: list = args.sampler_lengths + print("sampler_steps={} lenghts={}".format(self.sampler_steps, self.lengths)) + if self.sampler_steps is not None and len(self.sampler_steps) > 0: + # Enable sampling length adjustment. + assert len(self.lengths) > 0 + assert len(self.lengths) == len(self.sampler_steps) + 1 + for i in range(len(self.sampler_steps) - 1): + assert self.sampler_steps[i] < self.sampler_steps[i + 1] + self.item_num = len(self.img_files) - (self.lengths[-1] - 1) * self.sample_interval + self.period_idx = 0 + self.num_frames_per_batch = self.lengths[0] + self.current_epoch = 0 + + def _register_videos(self): + for label_name in self.label_files: + video_name = '/'.join(label_name.split('/')[:-1]) + if video_name not in self.video_dict: + print("register {}-th video: {} ".format(len(self.video_dict) + 1, video_name)) + self.video_dict[video_name] = len(self.video_dict) + # assert len(self.video_dict) <= 300 + + def set_epoch(self, epoch): + self.current_epoch = epoch + if self.sampler_steps is None or len(self.sampler_steps) == 0: + # fixed sampling length. + return + + for i in range(len(self.sampler_steps)): + if epoch >= self.sampler_steps[i]: + self.period_idx = i + 1 + print("set epoch: epoch {} period_idx={}".format(epoch, self.period_idx)) + self.num_frames_per_batch = self.lengths[self.period_idx] + + def step_epoch(self): + # one epoch finishes. + print("Dataset: epoch {} finishes".format(self.current_epoch)) + self.set_epoch(self.current_epoch + 1) + + @staticmethod + def _targets_to_instances(targets: dict, img_shape) -> Instances: + gt_instances = Instances(tuple(img_shape)) + gt_instances.boxes = targets['boxes'] + gt_instances.labels = targets['labels'] + gt_instances.obj_ids = targets['obj_ids'] + gt_instances.area = targets['area'] + return gt_instances + + def _pre_single_frame(self, idx: int): + img_path = self.img_files[idx] + label_path = self.label_files[idx] + if 'crowdhuman' in img_path: + img_path = img_path.replace('.jpg', '.png') + img = Image.open(img_path) + targets = {} + w, h = img._size + assert w > 0 and h > 0, "invalid image {} with shape {} {}".format(img_path, w, h) + if osp.isfile(label_path): + labels0 = np.loadtxt(label_path, dtype=np.float32).reshape(-1, 6) + + # normalized cewh to pixel xyxy format + labels = labels0.copy() + labels[:, 2] = w * (labels0[:, 2] - labels0[:, 4] / 2) + labels[:, 3] = h * (labels0[:, 3] - labels0[:, 5] / 2) + labels[:, 4] = w * (labels0[:, 2] + labels0[:, 4] / 2) + labels[:, 5] = h * (labels0[:, 3] + labels0[:, 5] / 2) + else: + raise ValueError('invalid label path: {}'.format(label_path)) + video_name = '/'.join(label_path.split('/')[:-1]) + obj_idx_offset = self.video_dict[video_name] * 1000000 # 1000000 unique ids is enough for a video. + if 'crowdhuman' in img_path: + targets['dataset'] = 'CrowdHuman' + elif 'MOT17' in img_path: + targets['dataset'] = 'MOT17' + else: + raise NotImplementedError() + targets['boxes'] = [] + targets['area'] = [] + targets['iscrowd'] = [] + targets['labels'] = [] + targets['obj_ids'] = [] + targets['image_id'] = torch.as_tensor(idx) + targets['size'] = torch.as_tensor([h, w]) + targets['orig_size'] = torch.as_tensor([h, w]) + for label in labels: + targets['boxes'].append(label[2:6].tolist()) + targets['area'].append(label[4] * label[5]) + targets['iscrowd'].append(0) + targets['labels'].append(0) + obj_id = label[1] + obj_idx_offset if label[1] >= 0 else label[1] + targets['obj_ids'].append(obj_id) # relative id + + targets['area'] = torch.as_tensor(targets['area']) + targets['iscrowd'] = torch.as_tensor(targets['iscrowd']) + targets['labels'] = torch.as_tensor(targets['labels']) + targets['obj_ids'] = torch.as_tensor(targets['obj_ids']) + targets['boxes'] = torch.as_tensor(targets['boxes'], dtype=torch.float32).reshape(-1, 4) +# targets['boxes'][:, 0::2].clamp_(min=0, max=w) +# targets['boxes'][:, 1::2].clamp_(min=0, max=h) + return img, targets + + def _get_sample_range(self, start_idx): + + # take default sampling method for normal dataset. + assert self.sample_mode in ['fixed_interval', 'random_interval'], 'invalid sample mode: {}'.format(self.sample_mode) + if self.sample_mode == 'fixed_interval': + sample_interval = self.sample_interval + elif self.sample_mode == 'random_interval': + sample_interval = np.random.randint(1, self.sample_interval + 1) + default_range = start_idx, start_idx + (self.num_frames_per_batch - 1) * sample_interval + 1, sample_interval + return default_range + + def pre_continuous_frames(self, start, end, interval=1): + targets = [] + images = [] + for i in range(start, end, interval): + img_i, targets_i = self._pre_single_frame(i) + images.append(img_i) + targets.append(targets_i) + return images, targets + + def __getitem__(self, idx): + sample_start, sample_end, sample_interval = self._get_sample_range(idx) + images, targets = self.pre_continuous_frames(sample_start, sample_end, sample_interval) + data = {} + dataset_name = targets[0]['dataset'] + transform = self.dataset2transform[dataset_name] + if transform is not None: + images, targets = transform(images, targets) + gt_instances = [] + for img_i, targets_i in zip(images, targets): + gt_instances_i = self._targets_to_instances(targets_i, img_i.shape[1:3]) + gt_instances.append(gt_instances_i) + data.update({ + 'imgs': images, + 'gt_instances': gt_instances, + }) + if self.args.vis: + data['ori_img'] = [target_i['ori_img'] for target_i in targets] + return data + + def __len__(self): + return self.item_num + + +class DetMOTDetectionValidation(DetMOTDetection): + def __init__(self, args, seqs_folder, dataset2transform): + args.data_txt_path = args.val_data_txt_path + super().__init__(args, seqs_folder, dataset2transform) + + + +def make_transforms_for_mot17(image_set, args=None): + + normalize = T.MotCompose([ + T.MotToTensor(), + T.MotNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]) + scales = [608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992] + + if image_set == 'train': + return T.MotCompose([ + T.MotRandomHorizontalFlip(), + T.MotRandomSelect( + T.MotRandomResize(scales, max_size=1536), + T.MotCompose([ + T.MotRandomResize([400, 500, 600]), + T.FixedMotRandomCrop(384, 600), + T.MotRandomResize(scales, max_size=1536), + ]) + ), + normalize, + ]) + + if image_set == 'val': + return T.MotCompose([ + T.MotRandomResize([800], max_size=1333), + normalize, + ]) + + raise ValueError(f'unknown {image_set}') + + +def make_transforms_for_crowdhuman(image_set, args=None): + + normalize = T.MotCompose([ + T.MotToTensor(), + T.MotNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]) + scales = [608, 640, 672, 704, 736, 768, 800, 832, 864, 896, 928, 960, 992] + + if image_set == 'train': + return T.MotCompose([ + T.MotRandomHorizontalFlip(), + T.FixedMotRandomShift(bs=1), + T.MotRandomSelect( + T.MotRandomResize(scales, max_size=1536), + T.MotCompose([ + T.MotRandomResize([400, 500, 600]), + T.FixedMotRandomCrop(384, 600), + T.MotRandomResize(scales, max_size=1536), + ]) + ), + normalize, + + ]) + + if image_set == 'val': + return T.MotCompose([ + T.MotRandomResize([800], max_size=1333), + normalize, + ]) + + raise ValueError(f'unknown {image_set}') + + +def build_dataset2transform(args, image_set): + mot17_train = make_transforms_for_mot17('train', args) + mot17_test = make_transforms_for_mot17('val', args) + + crowdhuman_train = make_transforms_for_crowdhuman('train', args) + dataset2transform_train = {'MOT17': mot17_train, 'CrowdHuman': crowdhuman_train} + dataset2transform_val = {'MOT17': mot17_test, 'CrowdHuman': mot17_test} + if image_set == 'train': + return dataset2transform_train + elif image_set == 'val': + return dataset2transform_val + else: + raise NotImplementedError() + + +def build(image_set, args): + root = Path(args.mot_path) + assert root.exists(), f'provided MOT path {root} does not exist' + dataset2transform = build_dataset2transform(args, image_set) + if image_set == 'train': + data_txt_path = args.data_txt_path_train + dataset = DetMOTDetection(args, data_txt_path=data_txt_path, seqs_folder=root, dataset2transform=dataset2transform) + if image_set == 'val': + data_txt_path = args.data_txt_path_val + dataset = DetMOTDetection(args, data_txt_path=data_txt_path, seqs_folder=root, dataset2transform=dataset2transform) + return dataset + diff --git a/tutorials/motr/mot_online/basetrack.py b/tutorials/motr/mot_online/basetrack.py new file mode 100644 index 0000000000000000000000000000000000000000..4fe2233607f6d4ed28b11a0ae6c0303c8ca19098 --- /dev/null +++ b/tutorials/motr/mot_online/basetrack.py @@ -0,0 +1,52 @@ +import numpy as np +from collections import OrderedDict + + +class TrackState(object): + New = 0 + Tracked = 1 + Lost = 2 + Removed = 3 + + +class BaseTrack(object): + _count = 0 + + track_id = 0 + is_activated = False + state = TrackState.New + + history = OrderedDict() + features = [] + curr_feature = None + score = 0 + start_frame = 0 + frame_id = 0 + time_since_update = 0 + + # multi-camera + location = (np.inf, np.inf) + + @property + def end_frame(self): + return self.frame_id + + @staticmethod + def next_id(): + BaseTrack._count += 1 + return BaseTrack._count + + def activate(self, *args): + raise NotImplementedError + + def predict(self): + raise NotImplementedError + + def update(self, *args, **kwargs): + raise NotImplementedError + + def mark_lost(self): + self.state = TrackState.Lost + + def mark_removed(self): + self.state = TrackState.Removed diff --git a/tutorials/motr/mot_online/kalman_filter.py b/tutorials/motr/mot_online/kalman_filter.py new file mode 100644 index 0000000000000000000000000000000000000000..82111a336d4d94bece171f2f95d9147bb7456285 --- /dev/null +++ b/tutorials/motr/mot_online/kalman_filter.py @@ -0,0 +1,252 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np +import scipy.linalg + +""" +Table for the 0.95 quantile of the chi-square distribution with N degrees of +freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv +function and used as Mahalanobis gating threshold. +""" +chi2inv95 = { + 1: 3.8415, + 2: 5.9915, + 3: 7.8147, + 4: 9.4877, + 5: 11.070, + 6: 12.592, + 7: 14.067, + 8: 15.507, + 9: 16.919} + + +class KalmanFilter(object): + """ + A simple Kalman filter for tracking bounding boxes in image space. + The 8-dimensional state space + x, y, a, h, vx, vy, va, vh + contains the bounding box center position (x, y), aspect ratio a, height h, + and their respective velocities. + Object motion follows a constant velocity model. The bounding box location + (x, y, a, h) is taken as direct observation of the state space (linear + observation model). + """ + + def __init__(self): + ndim, dt = 4, 1. + + # Create Kalman filter model matrices. + self._motion_mat = np.eye(2 * ndim, 2 * ndim) + for i in range(ndim): + self._motion_mat[i, ndim + i] = dt + self._update_mat = np.eye(ndim, 2 * ndim) + + # Motion and observation uncertainty are chosen relative to the current + # state estimate. These weights control the amount of uncertainty in + # the model. This is a bit hacky. + self._std_weight_position = 1. / 20 + self._std_weight_velocity = 1. / 160 + + def initiate(self, measurement): + """Create track from unassociated measurement. + Parameters + ---------- + measurement : ndarray + Bounding box coordinates (x, y, a, h) with center position (x, y), + aspect ratio a, and height h. + Returns + ------- + (ndarray, ndarray) + Returns the mean vector (8 dimensional) and covariance matrix (8x8 + dimensional) of the new track. Unobserved velocities are initialized + to 0 mean. + """ + mean_pos = measurement + mean_vel = np.zeros_like(mean_pos) + mean = np.r_[mean_pos, mean_vel] + + std = [ + 2 * self._std_weight_position * measurement[3], + 2 * self._std_weight_position * measurement[3], + 1e-2, + 2 * self._std_weight_position * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 1e-5, + 10 * self._std_weight_velocity * measurement[3]] + covariance = np.diag(np.square(std)) + return mean, covariance + + def predict(self, mean, covariance): + """Run Kalman filter prediction step. + Parameters + ---------- + mean : ndarray + The 8 dimensional mean vector of the object state at the previous + time step. + covariance : ndarray + The 8x8 dimensional covariance matrix of the object state at the + previous time step. + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-2, + self._std_weight_position * mean[3]] + std_vel = [ + self._std_weight_velocity * mean[3], + self._std_weight_velocity * mean[3], + 1e-5, + self._std_weight_velocity * mean[3]] + motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) + + #mean = np.dot(self._motion_mat, mean) + mean = np.dot(mean, self._motion_mat.T) + covariance = np.linalg.multi_dot(( + self._motion_mat, covariance, self._motion_mat.T)) + motion_cov + + return mean, covariance + + def project(self, mean, covariance): + """Project state distribution to measurement space. + Parameters + ---------- + mean : ndarray + The state's mean vector (8 dimensional array). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + Returns + ------- + (ndarray, ndarray) + Returns the projected mean and covariance matrix of the given state + estimate. + """ + std = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-1, + self._std_weight_position * mean[3]] + innovation_cov = np.diag(np.square(std)) + + mean = np.dot(self._update_mat, mean) + covariance = np.linalg.multi_dot(( + self._update_mat, covariance, self._update_mat.T)) + return mean, covariance + innovation_cov + + def multi_predict(self, mean, covariance): + """Run Kalman filter prediction step (Vectorized version). + Parameters + ---------- + mean : ndarray + The Nx8 dimensional mean matrix of the object states at the previous + time step. + covariance : ndarray + The Nx8x8 dimensional covariance matrics of the object states at the + previous time step. + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[:, 3], + self._std_weight_position * mean[:, 3], + 1e-2 * np.ones_like(mean[:, 3]), + self._std_weight_position * mean[:, 3]] + std_vel = [ + self._std_weight_velocity * mean[:, 3], + self._std_weight_velocity * mean[:, 3], + 1e-5 * np.ones_like(mean[:, 3]), + self._std_weight_velocity * mean[:, 3]] + sqr = np.square(np.r_[std_pos, std_vel]).T + + motion_cov = [] + for i in range(len(mean)): + motion_cov.append(np.diag(sqr[i])) + motion_cov = np.asarray(motion_cov) + + mean = np.dot(mean, self._motion_mat.T) + left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2)) + covariance = np.dot(left, self._motion_mat.T) + motion_cov + + return mean, covariance + + def update(self, mean, covariance, measurement): + """Run Kalman filter correction step. + Parameters + ---------- + mean : ndarray + The predicted state's mean vector (8 dimensional). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + measurement : ndarray + The 4 dimensional measurement vector (x, y, a, h), where (x, y) + is the center position, a the aspect ratio, and h the height of the + bounding box. + Returns + ------- + (ndarray, ndarray) + Returns the measurement-corrected state distribution. + """ + projected_mean, projected_cov = self.project(mean, covariance) + + chol_factor, lower = scipy.linalg.cho_factor( + projected_cov, lower=True, check_finite=False) + kalman_gain = scipy.linalg.cho_solve( + (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, + check_finite=False).T + innovation = measurement - projected_mean + + new_mean = mean + np.dot(innovation, kalman_gain.T) + new_covariance = covariance - np.linalg.multi_dot(( + kalman_gain, projected_cov, kalman_gain.T)) + return new_mean, new_covariance + + def gating_distance(self, mean, covariance, measurements, + only_position=False, metric='maha'): + """Compute gating distance between state distribution and measurements. + A suitable distance threshold can be obtained from `chi2inv95`. If + `only_position` is False, the chi-square distribution has 4 degrees of + freedom, otherwise 2. + Parameters + ---------- + mean : ndarray + Mean vector over the state distribution (8 dimensional). + covariance : ndarray + Covariance of the state distribution (8x8 dimensional). + measurements : ndarray + An Nx4 dimensional matrix of N measurements, each in + format (x, y, a, h) where (x, y) is the bounding box center + position, a the aspect ratio, and h the height. + only_position : Optional[bool] + If True, distance computation is done with respect to the bounding + box center position only. + Returns + ------- + ndarray + Returns an array of length N, where the i-th element contains the + squared Mahalanobis distance between (mean, covariance) and + `measurements[i]`. + """ + mean, covariance = self.project(mean, covariance) + if only_position: + mean, covariance = mean[:2], covariance[:2, :2] + measurements = measurements[:, :2] + + d = measurements - mean + if metric == 'gaussian': + return np.sum(d * d, axis=1) + elif metric == 'maha': + cholesky_factor = np.linalg.cholesky(covariance) + z = scipy.linalg.solve_triangular( + cholesky_factor, d.T, lower=True, check_finite=False, + overwrite_b=True) + squared_maha = np.sum(z * z, axis=0) + return squared_maha + else: + raise ValueError('invalid distance metric') diff --git a/tutorials/motr/mot_online/matching.py b/tutorials/motr/mot_online/matching.py new file mode 100644 index 0000000000000000000000000000000000000000..cc7abab60f86e5e84994071fc0ec0dd2f89c0377 --- /dev/null +++ b/tutorials/motr/mot_online/matching.py @@ -0,0 +1,196 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import lap +import numpy as np +import scipy +from cython_bbox import bbox_overlaps as bbox_ious +from scipy.spatial.distance import cdist + +chi2inv95 = { + 1: 3.8415, + 2: 5.9915, + 3: 7.8147, + 4: 9.4877, + 5: 11.070, + 6: 12.592, + 7: 14.067, + 8: 15.507, + 9: 16.919} + +def merge_matches(m1, m2, shape): + O,P,Q = shape + m1 = np.asarray(m1) + m2 = np.asarray(m2) + + M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P)) + M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q)) + + mask = M1*M2 + match = mask.nonzero() + match = list(zip(match[0], match[1])) + unmatched_O = tuple(set(range(O)) - set([i for i, j in match])) + unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match])) + + return match, unmatched_O, unmatched_Q + + +def _indices_to_matches(cost_matrix, indices, thresh): + matched_cost = cost_matrix[tuple(zip(*indices))] + matched_mask = (matched_cost <= thresh) + + matches = indices[matched_mask] + unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0])) + unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1])) + + return matches, unmatched_a, unmatched_b + + +def linear_assignment(cost_matrix, thresh): + if cost_matrix.size == 0: + return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) + matches, unmatched_a, unmatched_b = [], [], [] + cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) + for ix, mx in enumerate(x): + if mx >= 0: + matches.append([ix, mx]) + unmatched_a = np.where(x < 0)[0] + unmatched_b = np.where(y < 0)[0] + matches = np.asarray(matches) + return matches, unmatched_a, unmatched_b + + +def ious(atlbrs, btlbrs): + """ + Compute cost based on IoU + :type atlbrs: list[tlbr] | np.ndarray + :type atlbrs: list[tlbr] | np.ndarray + :rtype ious np.ndarray + """ + ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) + if ious.size == 0: + return ious + + ious = bbox_ious( + np.ascontiguousarray(atlbrs, dtype=np.float), + np.ascontiguousarray(btlbrs, dtype=np.float) + ) + + return ious + + +def iou_distance(atracks, btracks): + """ + Compute cost based on IoU + :type atracks: list[STrack] + :type btracks: list[STrack] + :rtype cost_matrix np.ndarray + """ + + if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): + atlbrs = atracks + btlbrs = btracks + else: + atlbrs = [track.tlbr for track in atracks] + btlbrs = [track.tlbr for track in btracks] + _ious = ious(atlbrs, btlbrs) + cost_matrix = 1 - _ious + + return cost_matrix + +def embedding_distance(tracks, detections, metric='cosine'): + """ + :param tracks: list[STrack] + :param detections: list[BaseTrack] + :param metric: + :return: cost_matrix np.ndarray + """ + + cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) + if cost_matrix.size == 0: + return cost_matrix + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + #for i, track in enumerate(tracks): + #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + return cost_matrix + +def embedding_distance2(tracks, detections, metric='cosine'): + """ + :param tracks: list[STrack] + :param detections: list[BaseTrack] + :param metric: + :return: cost_matrix np.ndarray + """ + + cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) + if cost_matrix.size == 0: + return cost_matrix + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + #for i, track in enumerate(tracks): + #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + track_features = np.asarray([track.features[0] for track in tracks], dtype=np.float) + cost_matrix2 = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + track_features = np.asarray([track.features[len(track.features)-1] for track in tracks], dtype=np.float) + cost_matrix3 = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + for row in range(len(cost_matrix)): + cost_matrix[row] = (cost_matrix[row]+cost_matrix2[row]+cost_matrix3[row])/3 + return cost_matrix + + +def vis_id_feature_A_distance(tracks, detections, metric='cosine'): + track_features = [] + det_features = [] + leg1 = len(tracks) + leg2 = len(detections) + cost_matrix = np.zeros((leg1, leg2), dtype=np.float) + cost_matrix_det = np.zeros((leg1, leg2), dtype=np.float) + cost_matrix_track = np.zeros((leg1, leg2), dtype=np.float) + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + if leg2 != 0: + cost_matrix_det = np.maximum(0.0, cdist(det_features, det_features, metric)) + if leg1 != 0: + cost_matrix_track = np.maximum(0.0, cdist(track_features, track_features, metric)) + if cost_matrix.size == 0: + return track_features, det_features, cost_matrix, cost_matrix_det, cost_matrix_track + cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) + if leg1 > 10: + leg1 = 10 + tracks = tracks[:10] + if leg2 > 10: + leg2 = 10 + detections = detections[:10] + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + return track_features, det_features, cost_matrix, cost_matrix_det, cost_matrix_track + +def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False): + if cost_matrix.size == 0: + return cost_matrix + gating_dim = 2 if only_position else 4 + gating_threshold = chi2inv95[gating_dim] + measurements = np.asarray([det.to_xyah() for det in detections]) + for row, track in enumerate(tracks): + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position) + cost_matrix[row, gating_distance > gating_threshold] = np.inf + return cost_matrix + + +def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98): + if cost_matrix.size == 0: + return cost_matrix + gating_dim = 2 if only_position else 4 + gating_threshold = chi2inv95[gating_dim] + measurements = np.asarray([det.to_xyah() for det in detections]) + for row, track in enumerate(tracks): + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position, metric='maha') + cost_matrix[row, gating_distance > gating_threshold] = np.inf + cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance + return cost_matrix diff --git a/tutorials/motr/motr.py b/tutorials/motr/motr.py new file mode 100644 index 0000000000000000000000000000000000000000..3e24b1d26318cd7d33a473198d743e9a9a69548f --- /dev/null +++ b/tutorials/motr/motr.py @@ -0,0 +1,676 @@ +# ------------------------------------------------------------------------ +# Copyright (c) 2021 megvii-model. All Rights Reserved. +# ------------------------------------------------------------------------ +# Modified from Deformable DETR (https://github.com/fundamentalvision/Deformable-DETR) +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ + +""" +DETR model and criterion classes. +""" +import copy +import math +import numpy as np +import torch +import torch.nn.functional as F +from torch import nn, Tensor +from typing import List + +from util import box_ops +from util.misc import (NestedTensor, nested_tensor_from_tensor_list, + accuracy, get_world_size, interpolate, get_rank, + is_dist_avail_and_initialized, inverse_sigmoid) + +from models.structures import Instances, Boxes, pairwise_iou, matched_boxlist_iou + +from .backbone import build_backbone +from .matcher import build_matcher +from .deformable_transformer_plus import build_deforamble_transformer +from .qim import build as build_query_interaction_layer +from .memory_bank import build_memory_bank +from .deformable_detr import SetCriterion, MLP +from .segmentation import sigmoid_focal_loss + + +class ClipMatcher(SetCriterion): + def __init__(self, num_classes, + matcher, + weight_dict, + losses): + """ Create the criterion. + Parameters: + num_classes: number of object categories, omitting the special no-object category + matcher: module able to compute a matching between targets and proposals + weight_dict: dict containing as key the names of the losses and as values their relative weight. + eos_coef: relative classification weight applied to the no-object category + losses: list of all the losses to be applied. See get_loss for list of available losses. + """ + super().__init__(num_classes, matcher, weight_dict, losses) + self.num_classes = num_classes + self.matcher = matcher + self.weight_dict = weight_dict + self.losses = losses + self.focal_loss = True + self.losses_dict = {} + self._current_frame_idx = 0 + + def initialize_for_single_clip(self, gt_instances: List[Instances]): + self.gt_instances = gt_instances + self.num_samples = 0 + self.sample_device = None + self._current_frame_idx = 0 + self.losses_dict = {} + + def _step(self): + self._current_frame_idx += 1 + + def calc_loss_for_track_scores(self, track_instances: Instances): + frame_id = self._current_frame_idx - 1 + gt_instances = self.gt_instances[frame_id] + outputs = { + 'pred_logits': track_instances.track_scores[None], + } + device = track_instances.track_scores.device + + num_tracks = len(track_instances) + src_idx = torch.arange(num_tracks, dtype=torch.long, device=device) + tgt_idx = track_instances.matched_gt_idxes # -1 for FP tracks and disappeared tracks + + track_losses = self.get_loss('labels', + outputs=outputs, + gt_instances=[gt_instances], + indices=[(src_idx, tgt_idx)], + num_boxes=1) + self.losses_dict.update( + {'frame_{}_track_{}'.format(frame_id, key): value for key, value in + track_losses.items()}) + + def get_num_boxes(self, num_samples): + num_boxes = torch.as_tensor(num_samples, dtype=torch.float, device=self.sample_device) + if is_dist_avail_and_initialized(): + torch.distributed.all_reduce(num_boxes) + num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item() + return num_boxes + + def get_loss(self, loss, outputs, gt_instances, indices, num_boxes, **kwargs): + loss_map = { + 'labels': self.loss_labels, + 'cardinality': self.loss_cardinality, + 'boxes': self.loss_boxes, + } + assert loss in loss_map, f'do you really want to compute {loss} loss?' + return loss_map[loss](outputs, gt_instances, indices, num_boxes, **kwargs) + + def loss_boxes(self, outputs, gt_instances: List[Instances], indices: List[tuple], num_boxes): + """Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss + targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4] + The target boxes are expected in format (center_x, center_y, h, w), normalized by the image size. + """ + # We ignore the regression loss of the track-disappear slots. + #TODO: Make this filter process more elegant. + filtered_idx = [] + for src_per_img, tgt_per_img in indices: + keep = tgt_per_img != -1 + filtered_idx.append((src_per_img[keep], tgt_per_img[keep])) + indices = filtered_idx + idx = self._get_src_permutation_idx(indices) + src_boxes = outputs['pred_boxes'][idx] + target_boxes = torch.cat([gt_per_img.boxes[i] for gt_per_img, (_, i) in zip(gt_instances, indices)], dim=0) + + # for pad target, don't calculate regression loss, judged by whether obj_id=-1 + target_obj_ids = torch.cat([gt_per_img.obj_ids[i] for gt_per_img, (_, i) in zip(gt_instances, indices)], dim=0) # size(16) + mask = (target_obj_ids != -1) + + loss_bbox = F.l1_loss(src_boxes[mask], target_boxes[mask], reduction='none') + loss_giou = 1 - torch.diag(box_ops.generalized_box_iou( + box_ops.box_cxcywh_to_xyxy(src_boxes[mask]), + box_ops.box_cxcywh_to_xyxy(target_boxes[mask]))) + + losses = {} + losses['loss_bbox'] = loss_bbox.sum() / num_boxes + losses['loss_giou'] = loss_giou.sum() / num_boxes + + return losses + + def loss_labels(self, outputs, gt_instances: List[Instances], indices, num_boxes, log=False): + """Classification loss (NLL) + targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes] + """ + src_logits = outputs['pred_logits'] + idx = self._get_src_permutation_idx(indices) + target_classes = torch.full(src_logits.shape[:2], self.num_classes, + dtype=torch.int64, device=src_logits.device) + # The matched gt for disappear track query is set -1. + labels = [] + for gt_per_img, (_, J) in zip(gt_instances, indices): + labels_per_img = torch.ones_like(J) + # set labels of track-appear slots to 0. + if len(gt_per_img) > 0: + labels_per_img[J != -1] = gt_per_img.labels[J[J != -1]] + labels.append(labels_per_img) + target_classes_o = torch.cat(labels) + target_classes[idx] = target_classes_o + if self.focal_loss: + gt_labels_target = F.one_hot(target_classes, num_classes=self.num_classes + 1)[:, :, :-1] # no loss for the last (background) class + gt_labels_target = gt_labels_target.to(src_logits) + loss_ce = sigmoid_focal_loss(src_logits.flatten(1), + gt_labels_target.flatten(1), + alpha=0.25, + gamma=2, + num_boxes=num_boxes, mean_in_dim1=False) + loss_ce = loss_ce.sum() + else: + loss_ce = F.cross_entropy(src_logits.transpose(1, 2), target_classes, self.empty_weight) + losses = {'loss_ce': loss_ce} + + if log: + # TODO this should probably be a separate loss, not hacked in this one here + losses['class_error'] = 100 - accuracy(src_logits[idx], target_classes_o)[0] + + return losses + + def match_for_single_frame(self, outputs: dict): + outputs_without_aux = {k: v for k, v in outputs.items() if k != 'aux_outputs'} + + gt_instances_i = self.gt_instances[self._current_frame_idx] # gt instances of i-th image. + track_instances: Instances = outputs_without_aux['track_instances'] + pred_logits_i = track_instances.pred_logits # predicted logits of i-th image. + pred_boxes_i = track_instances.pred_boxes # predicted boxes of i-th image. + + obj_idxes = gt_instances_i.obj_ids + obj_idxes_list = obj_idxes.detach().cpu().numpy().tolist() + obj_idx_to_gt_idx = {obj_idx: gt_idx for gt_idx, obj_idx in enumerate(obj_idxes_list)} + outputs_i = { + 'pred_logits': pred_logits_i.unsqueeze(0), + 'pred_boxes': pred_boxes_i.unsqueeze(0), + } + + # step1. inherit and update the previous tracks. + num_disappear_track = 0 + for j in range(len(track_instances)): + obj_id = track_instances.obj_idxes[j].item() + # set new target idx. + if obj_id >= 0: + if obj_id in obj_idx_to_gt_idx: + track_instances.matched_gt_idxes[j] = obj_idx_to_gt_idx[obj_id] + else: + num_disappear_track += 1 + track_instances.matched_gt_idxes[j] = -1 # track-disappear case. + else: + track_instances.matched_gt_idxes[j] = -1 + + full_track_idxes = torch.arange(len(track_instances), dtype=torch.long).to(pred_logits_i.device) + matched_track_idxes = (track_instances.obj_idxes >= 0) # occu + prev_matched_indices = torch.stack( + [full_track_idxes[matched_track_idxes], track_instances.matched_gt_idxes[matched_track_idxes]], dim=1).to( + pred_logits_i.device) + + # step2. select the unmatched slots. + # note that the FP tracks whose obj_idxes are -2 will not be selected here. + unmatched_track_idxes = full_track_idxes[track_instances.obj_idxes == -1] + + # step3. select the untracked gt instances (new tracks). + tgt_indexes = track_instances.matched_gt_idxes + tgt_indexes = tgt_indexes[tgt_indexes != -1] + + tgt_state = torch.zeros(len(gt_instances_i)).to(pred_logits_i.device) + tgt_state[tgt_indexes] = 1 + untracked_tgt_indexes = torch.arange(len(gt_instances_i)).to(pred_logits_i.device)[tgt_state == 0] + # untracked_tgt_indexes = select_unmatched_indexes(tgt_indexes, len(gt_instances_i)) + untracked_gt_instances = gt_instances_i[untracked_tgt_indexes] + + def match_for_single_decoder_layer(unmatched_outputs, matcher): + new_track_indices = matcher(unmatched_outputs, + [untracked_gt_instances]) # list[tuple(src_idx, tgt_idx)] + + src_idx = new_track_indices[0][0] + tgt_idx = new_track_indices[0][1] + # concat src and tgt. + new_matched_indices = torch.stack([unmatched_track_idxes[src_idx], untracked_tgt_indexes[tgt_idx]], + dim=1).to(pred_logits_i.device) + return new_matched_indices + + # step4. do matching between the unmatched slots and GTs. + unmatched_outputs = { + 'pred_logits': track_instances.pred_logits[unmatched_track_idxes].unsqueeze(0), + 'pred_boxes': track_instances.pred_boxes[unmatched_track_idxes].unsqueeze(0), + } + new_matched_indices = match_for_single_decoder_layer(unmatched_outputs, self.matcher) + + # step5. update obj_idxes according to the new matching result. + track_instances.obj_idxes[new_matched_indices[:, 0]] = gt_instances_i.obj_ids[new_matched_indices[:, 1]].long() + track_instances.matched_gt_idxes[new_matched_indices[:, 0]] = new_matched_indices[:, 1] + + # step6. calculate iou. + active_idxes = (track_instances.obj_idxes >= 0) & (track_instances.matched_gt_idxes >= 0) + active_track_boxes = track_instances.pred_boxes[active_idxes] + if len(active_track_boxes) > 0: + gt_boxes = gt_instances_i.boxes[track_instances.matched_gt_idxes[active_idxes]] + active_track_boxes = box_ops.box_cxcywh_to_xyxy(active_track_boxes) + gt_boxes = box_ops.box_cxcywh_to_xyxy(gt_boxes) + track_instances.iou[active_idxes] = matched_boxlist_iou(Boxes(active_track_boxes), Boxes(gt_boxes)) + + # step7. merge the unmatched pairs and the matched pairs. + matched_indices = torch.cat([new_matched_indices, prev_matched_indices], dim=0) + + # step8. calculate losses. + self.num_samples += len(gt_instances_i) + num_disappear_track + self.sample_device = pred_logits_i.device + for loss in self.losses: + new_track_loss = self.get_loss(loss, + outputs=outputs_i, + gt_instances=[gt_instances_i], + indices=[(matched_indices[:, 0], matched_indices[:, 1])], + num_boxes=1) + self.losses_dict.update( + {'frame_{}_{}'.format(self._current_frame_idx, key): value for key, value in new_track_loss.items()}) + + if 'aux_outputs' in outputs: + for i, aux_outputs in enumerate(outputs['aux_outputs']): + unmatched_outputs_layer = { + 'pred_logits': aux_outputs['pred_logits'][0, unmatched_track_idxes].unsqueeze(0), + 'pred_boxes': aux_outputs['pred_boxes'][0, unmatched_track_idxes].unsqueeze(0), + } + new_matched_indices_layer = match_for_single_decoder_layer(unmatched_outputs_layer, self.matcher) + matched_indices_layer = torch.cat([new_matched_indices_layer, prev_matched_indices], dim=0) + for loss in self.losses: + if loss == 'masks': + # Intermediate masks losses are too costly to compute, we ignore them. + continue + l_dict = self.get_loss(loss, + aux_outputs, + gt_instances=[gt_instances_i], + indices=[(matched_indices_layer[:, 0], matched_indices_layer[:, 1])], + num_boxes=1, ) + self.losses_dict.update( + {'frame_{}_aux{}_{}'.format(self._current_frame_idx, i, key): value for key, value in + l_dict.items()}) + self._step() + return track_instances + + def forward(self, outputs, input_data: dict): + # losses of each frame are calculated during the model's forwarding and are outputted by the model as outputs['losses_dict]. + losses = outputs.pop("losses_dict") + num_samples = self.get_num_boxes(self.num_samples) + for loss_name, loss in losses.items(): + losses[loss_name] /= num_samples + return losses + + +class RuntimeTrackerBase(object): + def __init__(self, score_thresh=0.8, filter_score_thresh=0.6, miss_tolerance=5): + self.score_thresh = score_thresh + self.filter_score_thresh = filter_score_thresh + self.miss_tolerance = miss_tolerance + self.max_obj_id = 0 + + def clear(self): + self.max_obj_id = 0 + + def update(self, track_instances: Instances): + track_instances.disappear_time[track_instances.scores >= self.score_thresh] = 0 + for i in range(len(track_instances)): + if track_instances.obj_idxes[i] == -1 and track_instances.scores[i] >= self.score_thresh: + # print("track {} has score {}, assign obj_id {}".format(i, track_instances.scores[i], self.max_obj_id)) + track_instances.obj_idxes[i] = self.max_obj_id + self.max_obj_id += 1 + elif track_instances.obj_idxes[i] >= 0 and track_instances.scores[i] < self.filter_score_thresh: + track_instances.disappear_time[i] += 1 + if track_instances.disappear_time[i] >= self.miss_tolerance: + # Set the obj_id to -1. + # Then this track will be removed by TrackEmbeddingLayer. + track_instances.obj_idxes[i] = -1 + + +class TrackerPostProcess(nn.Module): + """ This module converts the model's output into the format expected by the coco api""" + def __init__(self): + super().__init__() + + @torch.no_grad() + def forward(self, track_instances: Instances, target_size) -> Instances: + """ Perform the computation + Parameters: + outputs: raw outputs of the model + target_sizes: tensor of dimension [batch_size x 2] containing the size of each images of the batch + For evaluation, this must be the original image size (before any data augmentation) + For visualization, this should be the image size after data augment, but before padding + """ + out_logits = track_instances.pred_logits + out_bbox = track_instances.pred_boxes + + prob = out_logits.sigmoid() + # prob = out_logits[...,:1].sigmoid() + scores, labels = prob.max(-1) + + # convert to [x0, y0, x1, y1] format + boxes = box_ops.box_cxcywh_to_xyxy(out_bbox) + # and from relative [0, 1] to absolute [0, height] coordinates + img_h, img_w = target_size + scale_fct = torch.Tensor([img_w, img_h, img_w, img_h]).to(boxes) + boxes = boxes * scale_fct[None, :] + + track_instances.boxes = boxes + track_instances.scores = scores + track_instances.labels = labels +# track_instances.remove('pred_logits') +# track_instances.remove('pred_boxes') + return track_instances + + +def _get_clones(module, N): + return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) + + +class MOTR(nn.Module): + def __init__(self, backbone, transformer, num_classes, num_queries, num_feature_levels, criterion, track_embed, + aux_loss=True, with_box_refine=False, two_stage=False, memory_bank=None): + """ Initializes the model. + Parameters: + backbone: torch module of the backbone to be used. See backbone.py + transformer: torch module of the transformer architecture. See transformer.py + num_classes: number of object classes + num_queries: number of object queries, ie detection slot. This is the maximal number of objects + DETR can detect in a single image. For COCO, we recommend 100 queries. + aux_loss: True if auxiliary decoding losses (loss at each decoder layer) are to be used. + with_box_refine: iterative bounding box refinement + two_stage: two-stage Deformable DETR + """ + super().__init__() + self.num_queries = num_queries + self.track_embed = track_embed + self.transformer = transformer + hidden_dim = transformer.d_model + self.num_classes = num_classes + self.class_embed = nn.Linear(hidden_dim, num_classes) + self.bbox_embed = MLP(hidden_dim, hidden_dim, 4, 3) + self.num_feature_levels = num_feature_levels + if not two_stage: + self.query_embed = nn.Embedding(num_queries, hidden_dim * 2) + if num_feature_levels > 1: + num_backbone_outs = len(backbone.strides) + input_proj_list = [] + for _ in range(num_backbone_outs): + in_channels = backbone.num_channels[_] + input_proj_list.append(nn.Sequential( + nn.Conv2d(in_channels, hidden_dim, kernel_size=1), + nn.GroupNorm(32, hidden_dim), + )) + for _ in range(num_feature_levels - num_backbone_outs): + input_proj_list.append(nn.Sequential( + nn.Conv2d(in_channels, hidden_dim, kernel_size=3, stride=2, padding=1), + nn.GroupNorm(32, hidden_dim), + )) + in_channels = hidden_dim + self.input_proj = nn.ModuleList(input_proj_list) + else: + self.input_proj = nn.ModuleList([ + nn.Sequential( + nn.Conv2d(backbone.num_channels[0], hidden_dim, kernel_size=1), + nn.GroupNorm(32, hidden_dim), + )]) + self.backbone = backbone + self.aux_loss = aux_loss + self.with_box_refine = with_box_refine + self.two_stage = two_stage + + prior_prob = 0.01 + bias_value = -math.log((1 - prior_prob) / prior_prob) + self.class_embed.bias.data = torch.ones(num_classes) * bias_value + nn.init.constant_(self.bbox_embed.layers[-1].weight.data, 0) + nn.init.constant_(self.bbox_embed.layers[-1].bias.data, 0) + for proj in self.input_proj: + nn.init.xavier_uniform_(proj[0].weight, gain=1) + nn.init.constant_(proj[0].bias, 0) + + # if two-stage, the last class_embed and bbox_embed is for region proposal generation + num_pred = (transformer.decoder.num_layers + 1) if two_stage else transformer.decoder.num_layers + if with_box_refine: + self.class_embed = _get_clones(self.class_embed, num_pred) + self.bbox_embed = _get_clones(self.bbox_embed, num_pred) + nn.init.constant_(self.bbox_embed[0].layers[-1].bias.data[2:], -2.0) + # hack implementation for iterative bounding box refinement + self.transformer.decoder.bbox_embed = self.bbox_embed + else: + nn.init.constant_(self.bbox_embed.layers[-1].bias.data[2:], -2.0) + self.class_embed = nn.ModuleList([self.class_embed for _ in range(num_pred)]) + self.bbox_embed = nn.ModuleList([self.bbox_embed for _ in range(num_pred)]) + self.transformer.decoder.bbox_embed = None + if two_stage: + # hack implementation for two-stage + self.transformer.decoder.class_embed = self.class_embed + for box_embed in self.bbox_embed: + nn.init.constant_(box_embed.layers[-1].bias.data[2:], 0.0) + self.post_process = TrackerPostProcess() + self.track_base = RuntimeTrackerBase() + self.criterion = criterion + self.memory_bank = memory_bank + self.mem_bank_len = 0 if memory_bank is None else memory_bank.max_his_length + + def _generate_empty_tracks(self): + track_instances = Instances((1, 1)) + num_queries, dim = self.query_embed.weight.shape # (300, 512) + device = self.query_embed.weight.device + track_instances.ref_pts = self.transformer.reference_points(self.query_embed.weight[:, :dim // 2]) + track_instances.query_pos = self.query_embed.weight + track_instances.output_embedding = torch.zeros((num_queries, dim >> 1), device=device) + track_instances.obj_idxes = torch.full((len(track_instances),), -1, dtype=torch.long, device=device) + track_instances.matched_gt_idxes = torch.full((len(track_instances),), -1, dtype=torch.long, device=device) + track_instances.disappear_time = torch.zeros((len(track_instances), ), dtype=torch.long, device=device) + track_instances.iou = torch.zeros((len(track_instances),), dtype=torch.float, device=device) + track_instances.scores = torch.zeros((len(track_instances),), dtype=torch.float, device=device) + track_instances.track_scores = torch.zeros((len(track_instances),), dtype=torch.float, device=device) + track_instances.pred_boxes = torch.zeros((len(track_instances), 4), dtype=torch.float, device=device) + track_instances.pred_logits = torch.zeros((len(track_instances), self.num_classes), dtype=torch.float, device=device) + + mem_bank_len = self.mem_bank_len + track_instances.mem_bank = torch.zeros((len(track_instances), mem_bank_len, dim // 2), dtype=torch.float32, device=device) + track_instances.mem_padding_mask = torch.ones((len(track_instances), mem_bank_len), dtype=torch.bool, device=device) + track_instances.save_period = torch.zeros((len(track_instances), ), dtype=torch.float32, device=device) + + return track_instances.to(self.query_embed.weight.device) + + def clear(self): + self.track_base.clear() + + @torch.jit.unused + def _set_aux_loss(self, outputs_class, outputs_coord): + # this is a workaround to make torchscript happy, as torchscript + # doesn't support dictionary with non-homogeneous values, such + # as a dict having both a Tensor and a list. + return [{'pred_logits': a, 'pred_boxes': b, } + for a, b in zip(outputs_class[:-1], outputs_coord[:-1])] + + def _forward_single_image(self, samples, track_instances: Instances): + features, pos = self.backbone(samples) + src, mask = features[-1].decompose() + assert mask is not None + + srcs = [] + masks = [] + for l, feat in enumerate(features): + src, mask = feat.decompose() + srcs.append(self.input_proj[l](src)) + masks.append(mask) + assert mask is not None + + if self.num_feature_levels > len(srcs): + _len_srcs = len(srcs) + for l in range(_len_srcs, self.num_feature_levels): + if l == _len_srcs: + src = self.input_proj[l](features[-1].tensors) + else: + src = self.input_proj[l](srcs[-1]) + m = samples.mask + mask = F.interpolate(m[None].float(), size=src.shape[-2:]).to(torch.bool)[0] + pos_l = self.backbone[1](NestedTensor(src, mask)).to(src.dtype) + srcs.append(src) + masks.append(mask) + pos.append(pos_l) + + hs, init_reference, inter_references, enc_outputs_class, enc_outputs_coord_unact = self.transformer(srcs, masks, pos, track_instances.query_pos, ref_pts=track_instances.ref_pts) + + outputs_classes = [] + outputs_coords = [] + for lvl in range(hs.shape[0]): + if lvl == 0: + reference = init_reference + else: + reference = inter_references[lvl - 1] + reference = inverse_sigmoid(reference) + outputs_class = self.class_embed[lvl](hs[lvl]) + tmp = self.bbox_embed[lvl](hs[lvl]) + if reference.shape[-1] == 4: + tmp += reference + else: + assert reference.shape[-1] == 2 + tmp[..., :2] += reference + outputs_coord = tmp.sigmoid() + outputs_classes.append(outputs_class) + outputs_coords.append(outputs_coord) + outputs_class = torch.stack(outputs_classes) + outputs_coord = torch.stack(outputs_coords) + + ref_pts_all = torch.cat([init_reference[None], inter_references[:, :, :, :2]], dim=0) + out = {'pred_logits': outputs_class[-1], 'pred_boxes': outputs_coord[-1], 'ref_pts': ref_pts_all[5]} + if self.aux_loss: + out['aux_outputs'] = self._set_aux_loss(outputs_class, outputs_coord) + + with torch.no_grad(): + if self.training: + track_scores = outputs_class[-1, 0, :].sigmoid().max(dim=-1).values + else: + track_scores = outputs_class[-1, 0, :, 0].sigmoid() + + track_instances.scores = track_scores + track_instances.pred_logits = outputs_class[-1, 0] + track_instances.pred_boxes = outputs_coord[-1, 0] + track_instances.output_embedding = hs[-1, 0] + if self.training: + # the track id will be assigned by the mather. + out['track_instances'] = track_instances + track_instances = self.criterion.match_for_single_frame(out) + else: + # each track will be assigned an unique global id by the track base. + self.track_base.update(track_instances) + if self.memory_bank is not None: + track_instances = self.memory_bank(track_instances) + # track_instances.track_scores = track_instances.track_scores[..., 0] + # track_instances.scores = track_instances.track_scores.sigmoid() + if self.training: + self.criterion.calc_loss_for_track_scores(track_instances) + tmp = {} + tmp['init_track_instances'] = self._generate_empty_tracks() + tmp['track_instances'] = track_instances + out_track_instances = self.track_embed(tmp) + out['track_instances'] = out_track_instances + return out + + @torch.no_grad() + def inference_single_image(self, img, ori_img_size, track_instances=None): + if not isinstance(img, NestedTensor): + img = nested_tensor_from_tensor_list(img) + if track_instances is None: + track_instances = self._generate_empty_tracks() + + res = self._forward_single_image(img, track_instances=track_instances) + + track_instances = res['track_instances'] + track_instances = self.post_process(track_instances, ori_img_size) + ret = {'track_instances': track_instances} + if 'ref_pts' in res: + ref_pts = res['ref_pts'] + img_h, img_w = ori_img_size + scale_fct = torch.Tensor([img_w, img_h]).to(ref_pts) + ref_pts = ref_pts * scale_fct[None] + ret['ref_pts'] = ref_pts + return ret + + def forward(self, data: dict): + if self.training: + self.criterion.initialize_for_single_clip(data['gt_instances']) + frames = data['imgs'] # list of Tensor. + outputs = { + 'pred_logits': [], + 'pred_boxes': [], + } + + track_instances = self._generate_empty_tracks() + for frame in frames: + if not isinstance(frame, NestedTensor): + frame = nested_tensor_from_tensor_list([frame]) + frame_res = self._forward_single_image(frame, track_instances) + track_instances = frame_res['track_instances'] + outputs['pred_logits'].append(frame_res['pred_logits']) + outputs['pred_boxes'].append(frame_res['pred_boxes']) + + if not self.training: + outputs['track_instances'] = track_instances + else: + outputs['losses_dict'] = self.criterion.losses_dict + return outputs + + +def build(args): + dataset_to_num_classes = { + 'coco': 91, + 'coco_panoptic': 250, + 'e2e_mot': 1, + 'e2e_joint': 1, + 'e2e_static_mot': 1 + } + assert args.dataset_file in dataset_to_num_classes + num_classes = dataset_to_num_classes[args.dataset_file] + device = torch.device(args.device) + + backbone = build_backbone(args) + + transformer = build_deforamble_transformer(args) + d_model = transformer.d_model + hidden_dim = args.dim_feedforward + query_interaction_layer = build_query_interaction_layer(args, args.query_interaction_layer, d_model, hidden_dim, d_model*2) + + img_matcher = build_matcher(args) + num_frames_per_batch = max(args.sampler_lengths) + weight_dict = {} + for i in range(num_frames_per_batch): + weight_dict.update({"frame_{}_loss_ce".format(i): args.cls_loss_coef, + 'frame_{}_loss_bbox'.format(i): args.bbox_loss_coef, + 'frame_{}_loss_giou'.format(i): args.giou_loss_coef, + }) + + # TODO this is a hack + if args.aux_loss: + for i in range(num_frames_per_batch): + for j in range(args.dec_layers - 1): + weight_dict.update({"frame_{}_aux{}_loss_ce".format(i, j): args.cls_loss_coef, + 'frame_{}_aux{}_loss_bbox'.format(i, j): args.bbox_loss_coef, + 'frame_{}_aux{}_loss_giou'.format(i, j): args.giou_loss_coef, + }) + if args.memory_bank_type is not None and len(args.memory_bank_type) > 0: + memory_bank = build_memory_bank(args, d_model, hidden_dim, d_model * 2) + for i in range(num_frames_per_batch): + weight_dict.update({"frame_{}_track_loss_ce".format(i): args.cls_loss_coef}) + else: + memory_bank = None + losses = ['labels', 'boxes'] + criterion = ClipMatcher(num_classes, matcher=img_matcher, weight_dict=weight_dict, losses=losses) + criterion.to(device) + postprocessors = {} + model = MOTR( + backbone, + transformer, + track_embed=query_interaction_layer, + num_feature_levels=args.num_feature_levels, + num_classes=num_classes, + num_queries=args.num_queries, + aux_loss=args.aux_loss, + criterion=criterion, + with_box_refine=args.with_box_refine, + two_stage=args.two_stage, + memory_bank=memory_bank, + ) + return model, criterion, postprocessors diff --git a/tutorials/motr/motr_det.py b/tutorials/motr/motr_det.py new file mode 100644 index 0000000000000000000000000000000000000000..b9f74fdf8520385a79653a557631fa4a9ac1b9fc --- /dev/null +++ b/tutorials/motr/motr_det.py @@ -0,0 +1,677 @@ +# ------------------------------------------------------------------------ +# Copyright (c) 2021 megvii-model. All Rights Reserved. +# ------------------------------------------------------------------------ +# Modified from Deformable DETR (https://github.com/fundamentalvision/Deformable-DETR) +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ + +""" +DETR model and criterion classes. +""" +import copy +import math +import numpy as np +import torch +import torch.nn.functional as F +from torch import nn, Tensor +from typing import List + +from util import box_ops +from util.misc import (NestedTensor, nested_tensor_from_tensor_list, + accuracy, get_world_size, interpolate, get_rank, + is_dist_avail_and_initialized, inverse_sigmoid) + +from models.structures import Instances, Boxes, pairwise_iou, matched_boxlist_iou + +from .backbone import build_backbone +from .matcher import build_matcher +from .deformable_transformer_plus import build_deforamble_transformer +from .qim import build as build_query_interaction_layer +from .memory_bank import build_memory_bank +from .deformable_detr import SetCriterion, MLP +from .segmentation import sigmoid_focal_loss + + +class ClipMatcher(SetCriterion): + def __init__(self, num_classes, + matcher, + weight_dict, + losses): + """ Create the criterion. + Parameters: + num_classes: number of object categories, omitting the special no-object category + matcher: module able to compute a matching between targets and proposals + weight_dict: dict containing as key the names of the losses and as values their relative weight. + eos_coef: relative classification weight applied to the no-object category + losses: list of all the losses to be applied. See get_loss for list of available losses. + """ + super().__init__(num_classes, matcher, weight_dict, losses) + self.num_classes = num_classes + self.matcher = matcher + self.weight_dict = weight_dict + self.losses = losses + self.focal_loss = True + self.losses_dict = {} + self._current_frame_idx = 0 + + def initialize_for_single_clip(self, gt_instances: List[Instances]): + self.gt_instances = gt_instances + self.num_samples = 0 + self.sample_device = None + self._current_frame_idx = 0 + self.losses_dict = {} + + def _step(self): + self._current_frame_idx += 1 + + def calc_loss_for_track_scores(self, track_instances: Instances): + frame_id = self._current_frame_idx - 1 + gt_instances = self.gt_instances[frame_id] + outputs = { + 'pred_logits': track_instances.track_scores[None], + } + device = track_instances.track_scores.device + + num_tracks = len(track_instances) + src_idx = torch.arange(num_tracks, dtype=torch.long, device=device) + tgt_idx = track_instances.matched_gt_idxes # -1 for FP tracks and disappeared tracks + + track_losses = self.get_loss('labels', + outputs=outputs, + gt_instances=[gt_instances], + indices=[(src_idx, tgt_idx)], + num_boxes=1) + self.losses_dict.update( + {'frame_{}_track_{}'.format(frame_id, key): value for key, value in + track_losses.items()}) + + def get_num_boxes(self, num_samples): + num_boxes = torch.as_tensor(num_samples, dtype=torch.float, device=self.sample_device) + if is_dist_avail_and_initialized(): + torch.distributed.all_reduce(num_boxes) + num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item() + return num_boxes + + def get_loss(self, loss, outputs, gt_instances, indices, num_boxes, **kwargs): + loss_map = { + 'labels': self.loss_labels, + 'cardinality': self.loss_cardinality, + 'boxes': self.loss_boxes, + } + assert loss in loss_map, f'do you really want to compute {loss} loss?' + return loss_map[loss](outputs, gt_instances, indices, num_boxes, **kwargs) + + def loss_boxes(self, outputs, gt_instances: List[Instances], indices: List[tuple], num_boxes): + """Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss + targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4] + The target boxes are expected in format (center_x, center_y, h, w), normalized by the image size. + """ + # We ignore the regression loss of the track-disappear slots. + #TODO: Make this filter process more elegant. + filtered_idx = [] + for src_per_img, tgt_per_img in indices: + keep = tgt_per_img != -1 + filtered_idx.append((src_per_img[keep], tgt_per_img[keep])) + indices = filtered_idx + idx = self._get_src_permutation_idx(indices) + src_boxes = outputs['pred_boxes'][idx] + target_boxes = torch.cat([gt_per_img.boxes[i] for gt_per_img, (_, i) in zip(gt_instances, indices)], dim=0) + + # for pad target, don't calculate regression loss, judged by whether obj_id=-1 + target_obj_ids = torch.cat([gt_per_img.obj_ids[i] for gt_per_img, (_, i) in zip(gt_instances, indices)], dim=0) # size(16) + mask = (target_obj_ids != -1) + + loss_bbox = F.l1_loss(src_boxes[mask], target_boxes[mask], reduction='none') + loss_giou = 1 - torch.diag(box_ops.generalized_box_iou( + box_ops.box_cxcywh_to_xyxy(src_boxes[mask]), + box_ops.box_cxcywh_to_xyxy(target_boxes[mask]))) + + losses = {} + losses['loss_bbox'] = loss_bbox.sum() / num_boxes + losses['loss_giou'] = loss_giou.sum() / num_boxes + + return losses + + def loss_labels(self, outputs, gt_instances: List[Instances], indices, num_boxes, log=False): + """Classification loss (NLL) + targets dicts must contain the key "labels" containing a tensor of dim [nb_target_boxes] + """ + src_logits = outputs['pred_logits'] + idx = self._get_src_permutation_idx(indices) + target_classes = torch.full(src_logits.shape[:2], self.num_classes, + dtype=torch.int64, device=src_logits.device) + # The matched gt for disappear track query is set -1. + labels = [] + for gt_per_img, (_, J) in zip(gt_instances, indices): + labels_per_img = torch.ones_like(J) + # set labels of track-appear slots to 0. + if len(gt_per_img) > 0: + labels_per_img[J != -1] = gt_per_img.labels[J[J != -1]] + labels.append(labels_per_img) + target_classes_o = torch.cat(labels) + target_classes[idx] = target_classes_o + if self.focal_loss: + gt_labels_target = F.one_hot(target_classes, num_classes=self.num_classes + 1)[:, :, :-1] # no loss for the last (background) class + gt_labels_target = gt_labels_target.to(src_logits) + loss_ce = sigmoid_focal_loss(src_logits.flatten(1), + gt_labels_target.flatten(1), + alpha=0.25, + gamma=2, + num_boxes=num_boxes, mean_in_dim1=False) + loss_ce = loss_ce.sum() + else: + loss_ce = F.cross_entropy(src_logits.transpose(1, 2), target_classes, self.empty_weight) + losses = {'loss_ce': loss_ce} + + if log: + # TODO this should probably be a separate loss, not hacked in this one here + losses['class_error'] = 100 - accuracy(src_logits[idx], target_classes_o)[0] + + return losses + + def match_for_single_frame(self, outputs: dict): + outputs_without_aux = {k: v for k, v in outputs.items() if k != 'aux_outputs'} + + gt_instances_i = self.gt_instances[self._current_frame_idx] # gt instances of i-th image. + track_instances: Instances = outputs_without_aux['track_instances'] + pred_logits_i = track_instances.pred_logits # predicted logits of i-th image. + pred_boxes_i = track_instances.pred_boxes # predicted boxes of i-th image. + + obj_idxes = gt_instances_i.obj_ids + obj_idxes_list = obj_idxes.detach().cpu().numpy().tolist() + obj_idx_to_gt_idx = {obj_idx: gt_idx for gt_idx, obj_idx in enumerate(obj_idxes_list)} + outputs_i = { + 'pred_logits': pred_logits_i.unsqueeze(0), + 'pred_boxes': pred_boxes_i.unsqueeze(0), + } + + # step1. inherit and update the previous tracks. + num_disappear_track = 0 + for j in range(len(track_instances)): + obj_id = track_instances.obj_idxes[j].item() + # set new target idx. + if obj_id >= 0: + if obj_id in obj_idx_to_gt_idx: + track_instances.matched_gt_idxes[j] = obj_idx_to_gt_idx[obj_id] + else: + num_disappear_track += 1 + track_instances.matched_gt_idxes[j] = -1 # track-disappear case. + else: + track_instances.matched_gt_idxes[j] = -1 + + full_track_idxes = torch.arange(len(track_instances), dtype=torch.long).to(pred_logits_i.device) + matched_track_idxes = (track_instances.obj_idxes >= 0) # occu + prev_matched_indices = torch.stack( + [full_track_idxes[matched_track_idxes], track_instances.matched_gt_idxes[matched_track_idxes]], dim=1).to( + pred_logits_i.device) + + # step2. select the unmatched slots. + # note that the FP tracks whose obj_idxes are -2 will not be selected here. + unmatched_track_idxes = full_track_idxes[track_instances.obj_idxes == -1] + + # step3. select the untracked gt instances (new tracks). + tgt_indexes = track_instances.matched_gt_idxes + tgt_indexes = tgt_indexes[tgt_indexes != -1] + + tgt_state = torch.zeros(len(gt_instances_i)).to(pred_logits_i.device) + tgt_state[tgt_indexes] = 1 + untracked_tgt_indexes = torch.arange(len(gt_instances_i)).to(pred_logits_i.device)[tgt_state == 0] + # untracked_tgt_indexes = select_unmatched_indexes(tgt_indexes, len(gt_instances_i)) + untracked_gt_instances = gt_instances_i[untracked_tgt_indexes] + + def match_for_single_decoder_layer(unmatched_outputs, matcher): + new_track_indices = matcher(unmatched_outputs, + [untracked_gt_instances]) # list[tuple(src_idx, tgt_idx)] + + src_idx = new_track_indices[0][0] + tgt_idx = new_track_indices[0][1] + # concat src and tgt. + new_matched_indices = torch.stack([unmatched_track_idxes[src_idx], untracked_tgt_indexes[tgt_idx]], + dim=1).to(pred_logits_i.device) + return new_matched_indices + + # step4. do matching between the unmatched slots and GTs. + unmatched_outputs = { + 'pred_logits': track_instances.pred_logits[unmatched_track_idxes].unsqueeze(0), + 'pred_boxes': track_instances.pred_boxes[unmatched_track_idxes].unsqueeze(0), + } + new_matched_indices = match_for_single_decoder_layer(unmatched_outputs, self.matcher) + + # step5. update obj_idxes according to the new matching result. + track_instances.obj_idxes[new_matched_indices[:, 0]] = gt_instances_i.obj_ids[new_matched_indices[:, 1]].long() + track_instances.matched_gt_idxes[new_matched_indices[:, 0]] = new_matched_indices[:, 1] + + # step6. calculate iou. + active_idxes = (track_instances.obj_idxes >= 0) & (track_instances.matched_gt_idxes >= 0) + active_track_boxes = track_instances.pred_boxes[active_idxes] + if len(active_track_boxes) > 0: + gt_boxes = gt_instances_i.boxes[track_instances.matched_gt_idxes[active_idxes]] + active_track_boxes = box_ops.box_cxcywh_to_xyxy(active_track_boxes) + gt_boxes = box_ops.box_cxcywh_to_xyxy(gt_boxes) + track_instances.iou[active_idxes] = matched_boxlist_iou(Boxes(active_track_boxes), Boxes(gt_boxes)) + + # step7. merge the unmatched pairs and the matched pairs. + matched_indices = torch.cat([new_matched_indices, prev_matched_indices], dim=0) + + # step8. calculate losses. + self.num_samples += len(gt_instances_i) + num_disappear_track + self.sample_device = pred_logits_i.device + for loss in self.losses: + new_track_loss = self.get_loss(loss, + outputs=outputs_i, + gt_instances=[gt_instances_i], + indices=[(matched_indices[:, 0], matched_indices[:, 1])], + num_boxes=1) + self.losses_dict.update( + {'frame_{}_{}'.format(self._current_frame_idx, key): value for key, value in new_track_loss.items()}) + + if 'aux_outputs' in outputs: + for i, aux_outputs in enumerate(outputs['aux_outputs']): + unmatched_outputs_layer = { + 'pred_logits': aux_outputs['pred_logits'][0, unmatched_track_idxes].unsqueeze(0), + 'pred_boxes': aux_outputs['pred_boxes'][0, unmatched_track_idxes].unsqueeze(0), + } + new_matched_indices_layer = match_for_single_decoder_layer(unmatched_outputs_layer, self.matcher) + matched_indices_layer = torch.cat([new_matched_indices_layer, prev_matched_indices], dim=0) + for loss in self.losses: + if loss == 'masks': + # Intermediate masks losses are too costly to compute, we ignore them. + continue + l_dict = self.get_loss(loss, + aux_outputs, + gt_instances=[gt_instances_i], + indices=[(matched_indices_layer[:, 0], matched_indices_layer[:, 1])], + num_boxes=1, ) + self.losses_dict.update( + {'frame_{}_aux{}_{}'.format(self._current_frame_idx, i, key): value for key, value in + l_dict.items()}) + self._step() + return track_instances + + def forward(self, outputs, input_data: dict): + # losses of each frame are calculated during the model's forwarding and are outputted by the model as outputs['losses_dict]. + losses = outputs.pop("losses_dict") + num_samples = self.get_num_boxes(self.num_samples) + for loss_name, loss in losses.items(): + losses[loss_name] /= num_samples + return losses + + +class RuntimeTrackerBase(object): + def __init__(self, score_thresh=0.8, filter_score_thresh=0.6, miss_tolerance=5): + self.score_thresh = score_thresh + self.filter_score_thresh = filter_score_thresh + self.miss_tolerance = miss_tolerance + self.max_obj_id = 0 + + def clear(self): + self.max_obj_id = 0 + + def update(self, track_instances: Instances): + track_instances.disappear_time[track_instances.scores >= self.score_thresh] = 0 + for i in range(len(track_instances)): + if track_instances.obj_idxes[i] == -1 and track_instances.scores[i] >= self.score_thresh: + # print("track {} has score {}, assign obj_id {}".format(i, track_instances.scores[i], self.max_obj_id)) + track_instances.obj_idxes[i] = self.max_obj_id + self.max_obj_id += 1 + elif track_instances.obj_idxes[i] >= 0 and track_instances.scores[i] < self.filter_score_thresh: + track_instances.disappear_time[i] += 1 + if track_instances.disappear_time[i] >= self.miss_tolerance: + # Set the obj_id to -1. + # Then this track will be removed by TrackEmbeddingLayer. + track_instances.obj_idxes[i] = -1 + + +class TrackerPostProcess(nn.Module): + """ This module converts the model's output into the format expected by the coco api""" + def __init__(self): + super().__init__() + + @torch.no_grad() + def forward(self, track_instances: Instances, target_size) -> Instances: + """ Perform the computation + Parameters: + outputs: raw outputs of the model + target_sizes: tensor of dimension [batch_size x 2] containing the size of each images of the batch + For evaluation, this must be the original image size (before any data augmentation) + For visualization, this should be the image size after data augment, but before padding + """ + out_logits = track_instances.pred_logits + out_bbox = track_instances.pred_boxes + + prob = out_logits.sigmoid() + # prob = out_logits[...,:1].sigmoid() + scores, labels = prob.max(-1) + + # convert to [x0, y0, x1, y1] format + boxes = box_ops.box_cxcywh_to_xyxy(out_bbox) + # and from relative [0, 1] to absolute [0, height] coordinates + img_h, img_w = target_size + scale_fct = torch.Tensor([img_w, img_h, img_w, img_h]).to(boxes) + boxes = boxes * scale_fct[None, :] + + track_instances.boxes = boxes + track_instances.scores = scores + track_instances.labels = labels +# track_instances.remove('pred_logits') +# track_instances.remove('pred_boxes') + return track_instances + + +def _get_clones(module, N): + return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) + + +class MOTR(nn.Module): + def __init__(self, backbone, transformer, num_classes, num_queries, num_feature_levels, criterion, track_embed, + aux_loss=True, with_box_refine=False, two_stage=False, memory_bank=None): + """ Initializes the model. + Parameters: + backbone: torch module of the backbone to be used. See backbone.py + transformer: torch module of the transformer architecture. See transformer.py + num_classes: number of object classes + num_queries: number of object queries, ie detection slot. This is the maximal number of objects + DETR can detect in a single image. For COCO, we recommend 100 queries. + aux_loss: True if auxiliary decoding losses (loss at each decoder layer) are to be used. + with_box_refine: iterative bounding box refinement + two_stage: two-stage Deformable DETR + """ + super().__init__() + self.num_queries = num_queries + self.track_embed = track_embed + self.transformer = transformer + hidden_dim = transformer.d_model + self.num_classes = num_classes + self.class_embed = nn.Linear(hidden_dim, num_classes) + self.bbox_embed = MLP(hidden_dim, hidden_dim, 4, 3) + self.num_feature_levels = num_feature_levels + if not two_stage: + self.query_embed = nn.Embedding(num_queries, hidden_dim * 2) + if num_feature_levels > 1: + num_backbone_outs = len(backbone.strides) + input_proj_list = [] + for _ in range(num_backbone_outs): + in_channels = backbone.num_channels[_] + input_proj_list.append(nn.Sequential( + nn.Conv2d(in_channels, hidden_dim, kernel_size=1), + nn.GroupNorm(32, hidden_dim), + )) + for _ in range(num_feature_levels - num_backbone_outs): + input_proj_list.append(nn.Sequential( + nn.Conv2d(in_channels, hidden_dim, kernel_size=3, stride=2, padding=1), + nn.GroupNorm(32, hidden_dim), + )) + in_channels = hidden_dim + self.input_proj = nn.ModuleList(input_proj_list) + else: + self.input_proj = nn.ModuleList([ + nn.Sequential( + nn.Conv2d(backbone.num_channels[0], hidden_dim, kernel_size=1), + nn.GroupNorm(32, hidden_dim), + )]) + self.backbone = backbone + self.aux_loss = aux_loss + self.with_box_refine = with_box_refine + self.two_stage = two_stage + + prior_prob = 0.01 + bias_value = -math.log((1 - prior_prob) / prior_prob) + self.class_embed.bias.data = torch.ones(num_classes) * bias_value + nn.init.constant_(self.bbox_embed.layers[-1].weight.data, 0) + nn.init.constant_(self.bbox_embed.layers[-1].bias.data, 0) + for proj in self.input_proj: + nn.init.xavier_uniform_(proj[0].weight, gain=1) + nn.init.constant_(proj[0].bias, 0) + + # if two-stage, the last class_embed and bbox_embed is for region proposal generation + num_pred = (transformer.decoder.num_layers + 1) if two_stage else transformer.decoder.num_layers + if with_box_refine: + self.class_embed = _get_clones(self.class_embed, num_pred) + self.bbox_embed = _get_clones(self.bbox_embed, num_pred) + nn.init.constant_(self.bbox_embed[0].layers[-1].bias.data[2:], -2.0) + # hack implementation for iterative bounding box refinement + self.transformer.decoder.bbox_embed = self.bbox_embed + else: + nn.init.constant_(self.bbox_embed.layers[-1].bias.data[2:], -2.0) + self.class_embed = nn.ModuleList([self.class_embed for _ in range(num_pred)]) + self.bbox_embed = nn.ModuleList([self.bbox_embed for _ in range(num_pred)]) + self.transformer.decoder.bbox_embed = None + if two_stage: + # hack implementation for two-stage + self.transformer.decoder.class_embed = self.class_embed + for box_embed in self.bbox_embed: + nn.init.constant_(box_embed.layers[-1].bias.data[2:], 0.0) + self.post_process = TrackerPostProcess() + self.track_base = RuntimeTrackerBase() + self.criterion = criterion + self.memory_bank = memory_bank + self.mem_bank_len = 0 if memory_bank is None else memory_bank.max_his_length + + def _generate_empty_tracks(self): + track_instances = Instances((1, 1)) + num_queries, dim = self.query_embed.weight.shape # (300, 512) + device = self.query_embed.weight.device + track_instances.ref_pts = self.transformer.reference_points(self.query_embed.weight[:, :dim // 2]) + track_instances.query_pos = self.query_embed.weight + track_instances.output_embedding = torch.zeros((num_queries, dim >> 1), device=device) + track_instances.obj_idxes = torch.full((len(track_instances),), -1, dtype=torch.long, device=device) + track_instances.matched_gt_idxes = torch.full((len(track_instances),), -1, dtype=torch.long, device=device) + track_instances.disappear_time = torch.zeros((len(track_instances), ), dtype=torch.long, device=device) + track_instances.iou = torch.zeros((len(track_instances),), dtype=torch.float, device=device) + track_instances.scores = torch.zeros((len(track_instances),), dtype=torch.float, device=device) + track_instances.track_scores = torch.zeros((len(track_instances),), dtype=torch.float, device=device) + track_instances.pred_boxes = torch.zeros((len(track_instances), 4), dtype=torch.float, device=device) + track_instances.pred_logits = torch.zeros((len(track_instances), self.num_classes), dtype=torch.float, device=device) + + mem_bank_len = self.mem_bank_len + track_instances.mem_bank = torch.zeros((len(track_instances), mem_bank_len, dim // 2), dtype=torch.float32, device=device) + track_instances.mem_padding_mask = torch.ones((len(track_instances), mem_bank_len), dtype=torch.bool, device=device) + track_instances.save_period = torch.zeros((len(track_instances), ), dtype=torch.float32, device=device) + + return track_instances.to(self.query_embed.weight.device) + + def clear(self): + self.track_base.clear() + + @torch.jit.unused + def _set_aux_loss(self, outputs_class, outputs_coord): + # this is a workaround to make torchscript happy, as torchscript + # doesn't support dictionary with non-homogeneous values, such + # as a dict having both a Tensor and a list. + return [{'pred_logits': a, 'pred_boxes': b, } + for a, b in zip(outputs_class[:-1], outputs_coord[:-1])] + + def _forward_single_image(self, samples, track_instances: Instances): + features, pos = self.backbone(samples) + src, mask = features[-1].decompose() + assert mask is not None + + srcs = [] + masks = [] + for l, feat in enumerate(features): + src, mask = feat.decompose() + srcs.append(self.input_proj[l](src)) + masks.append(mask) + assert mask is not None + + if self.num_feature_levels > len(srcs): + _len_srcs = len(srcs) + for l in range(_len_srcs, self.num_feature_levels): + if l == _len_srcs: + src = self.input_proj[l](features[-1].tensors) + else: + src = self.input_proj[l](srcs[-1]) + m = samples.mask + mask = F.interpolate(m[None].float(), size=src.shape[-2:]).to(torch.bool)[0] + pos_l = self.backbone[1](NestedTensor(src, mask)).to(src.dtype) + srcs.append(src) + masks.append(mask) + pos.append(pos_l) + + hs, init_reference, inter_references, enc_outputs_class, enc_outputs_coord_unact = self.transformer(srcs, masks, pos, track_instances.query_pos, ref_pts=track_instances.ref_pts) + + outputs_classes = [] + outputs_coords = [] + for lvl in range(hs.shape[0]): + if lvl == 0: + reference = init_reference + else: + reference = inter_references[lvl - 1] + reference = inverse_sigmoid(reference) + outputs_class = self.class_embed[lvl](hs[lvl]) + tmp = self.bbox_embed[lvl](hs[lvl]) + if reference.shape[-1] == 4: + tmp += reference + else: + assert reference.shape[-1] == 2 + tmp[..., :2] += reference + outputs_coord = tmp.sigmoid() + outputs_classes.append(outputs_class) + outputs_coords.append(outputs_coord) + outputs_class = torch.stack(outputs_classes) + outputs_coord = torch.stack(outputs_coords) + + ref_pts_all = torch.cat([init_reference[None], inter_references[:, :, :, :2]], dim=0) + out = {'pred_logits': outputs_class[-1], 'pred_boxes': outputs_coord[-1], 'ref_pts': ref_pts_all[5]} + if self.aux_loss: + out['aux_outputs'] = self._set_aux_loss(outputs_class, outputs_coord) + + with torch.no_grad(): + if self.training: + track_scores = outputs_class[-1, 0, :].sigmoid().max(dim=-1).values + else: + track_scores = outputs_class[-1, 0, :, 0].sigmoid() + + track_instances.scores = track_scores + track_instances.pred_logits = outputs_class[-1, 0] + track_instances.pred_boxes = outputs_coord[-1, 0] + track_instances.output_embedding = hs[-1, 0] + if self.training: + # the track id will be assigned by the mather. + out['track_instances'] = track_instances + track_instances = self.criterion.match_for_single_frame(out) + else: + # each track will be assigned an unique global id by the track base. + self.track_base.update(track_instances) + if self.memory_bank is not None: + track_instances = self.memory_bank(track_instances) + # track_instances.track_scores = track_instances.track_scores[..., 0] + # track_instances.scores = track_instances.track_scores.sigmoid() + if self.training: + self.criterion.calc_loss_for_track_scores(track_instances) + tmp = {} + tmp['init_track_instances'] = self._generate_empty_tracks() + tmp['track_instances'] = track_instances + out_track_instances = self.track_embed(tmp) + out['track_instances'] = out_track_instances + return out + + @torch.no_grad() + def inference_single_image(self, img, ori_img_size, track_instances=None): + if not isinstance(img, NestedTensor): + img = nested_tensor_from_tensor_list(img) +# if track_instances is None: +# track_instances = self._generate_empty_tracks() + track_instances = self._generate_empty_tracks() + + res = self._forward_single_image(img, track_instances=track_instances) + + track_instances = res['track_instances'] + track_instances = self.post_process(track_instances, ori_img_size) + ret = {'track_instances': track_instances} + if 'ref_pts' in res: + ref_pts = res['ref_pts'] + img_h, img_w = ori_img_size + scale_fct = torch.Tensor([img_w, img_h]).to(ref_pts) + ref_pts = ref_pts * scale_fct[None] + ret['ref_pts'] = ref_pts + return ret + + def forward(self, data: dict): + if self.training: + self.criterion.initialize_for_single_clip(data['gt_instances']) + frames = data['imgs'] # list of Tensor. + outputs = { + 'pred_logits': [], + 'pred_boxes': [], + } + + track_instances = self._generate_empty_tracks() + for frame in frames: + if not isinstance(frame, NestedTensor): + frame = nested_tensor_from_tensor_list([frame]) + frame_res = self._forward_single_image(frame, track_instances) + track_instances = frame_res['track_instances'] + outputs['pred_logits'].append(frame_res['pred_logits']) + outputs['pred_boxes'].append(frame_res['pred_boxes']) + + if not self.training: + outputs['track_instances'] = track_instances + else: + outputs['losses_dict'] = self.criterion.losses_dict + return outputs + + +def build(args): + dataset_to_num_classes = { + 'coco': 91, + 'coco_panoptic': 250, + 'e2e_mot': 1, + 'e2e_joint': 1, + 'e2e_static_mot': 1 + } + assert args.dataset_file in dataset_to_num_classes + num_classes = dataset_to_num_classes[args.dataset_file] + device = torch.device(args.device) + + backbone = build_backbone(args) + + transformer = build_deforamble_transformer(args) + d_model = transformer.d_model + hidden_dim = args.dim_feedforward + query_interaction_layer = build_query_interaction_layer(args, args.query_interaction_layer, d_model, hidden_dim, d_model*2) + + img_matcher = build_matcher(args) + num_frames_per_batch = max(args.sampler_lengths) + weight_dict = {} + for i in range(num_frames_per_batch): + weight_dict.update({"frame_{}_loss_ce".format(i): args.cls_loss_coef, + 'frame_{}_loss_bbox'.format(i): args.bbox_loss_coef, + 'frame_{}_loss_giou'.format(i): args.giou_loss_coef, + }) + + # TODO this is a hack + if args.aux_loss: + for i in range(num_frames_per_batch): + for j in range(args.dec_layers - 1): + weight_dict.update({"frame_{}_aux{}_loss_ce".format(i, j): args.cls_loss_coef, + 'frame_{}_aux{}_loss_bbox'.format(i, j): args.bbox_loss_coef, + 'frame_{}_aux{}_loss_giou'.format(i, j): args.giou_loss_coef, + }) + if args.memory_bank_type is not None and len(args.memory_bank_type) > 0: + memory_bank = build_memory_bank(args, d_model, hidden_dim, d_model * 2) + for i in range(num_frames_per_batch): + weight_dict.update({"frame_{}_track_loss_ce".format(i): args.cls_loss_coef}) + else: + memory_bank = None + losses = ['labels', 'boxes'] + criterion = ClipMatcher(num_classes, matcher=img_matcher, weight_dict=weight_dict, losses=losses) + criterion.to(device) + postprocessors = {} + model = MOTR( + backbone, + transformer, + track_embed=query_interaction_layer, + num_feature_levels=args.num_feature_levels, + num_classes=num_classes, + num_queries=args.num_queries, + aux_loss=args.aux_loss, + criterion=criterion, + with_box_refine=args.with_box_refine, + two_stage=args.two_stage, + memory_bank=memory_bank, + ) + return model, criterion, postprocessors diff --git a/tutorials/motr/transforms.py b/tutorials/motr/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..064d1f057a7084153db597ba9b723a8f2c14f243 --- /dev/null +++ b/tutorials/motr/transforms.py @@ -0,0 +1,650 @@ +# ------------------------------------------------------------------------ +# Copyright (c) 2021 megvii-model. All Rights Reserved. +# ------------------------------------------------------------------------ +# Modified from Deformable DETR (https://github.com/fundamentalvision/Deformable-DETR) +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ + +""" +Transforms and data augmentation for both image + bbox. +""" +import copy +import random +import PIL +import torch +import torchvision.transforms as T +import torchvision.transforms.functional as F +from PIL import Image, ImageDraw +from util.box_ops import box_xyxy_to_cxcywh +from util.misc import interpolate +import numpy as np +import os + + + +def crop_mot(image, target, region): + cropped_image = F.crop(image, *region) + + target = target.copy() + i, j, h, w = region + + # should we do something wrt the original size? + target["size"] = torch.tensor([h, w]) + + fields = ["labels", "area", "iscrowd"] + if 'obj_ids' in target: + fields.append('obj_ids') + + if "boxes" in target: + boxes = target["boxes"] + max_size = torch.as_tensor([w, h], dtype=torch.float32) + cropped_boxes = boxes - torch.as_tensor([j, i, j, i]) + + for i, box in enumerate(cropped_boxes): + l, t, r, b = box +# if l < 0: +# l = 0 +# if r < 0: +# r = 0 +# if l > w: +# l = w +# if r > w: +# r = w +# if t < 0: +# t = 0 +# if b < 0: +# b = 0 +# if t > h: +# t = h +# if b > h: +# b = h + if l < 0 and r < 0: + l = r = 0 + if l > w and r > w: + l = r = w + if t < 0 and b < 0: + t = b = 0 + if t > h and b > h: + t = b = h + cropped_boxes[i] = torch.tensor([l, t, r, b], dtype=box.dtype) + + cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size) + cropped_boxes = cropped_boxes.clamp(min=0) + area = (cropped_boxes[:, 1, :] - cropped_boxes[:, 0, :]).prod(dim=1) + target["boxes"] = cropped_boxes.reshape(-1, 4) + target["area"] = area + fields.append("boxes") + + if "masks" in target: + # FIXME should we update the area here if there are no boxes? + target['masks'] = target['masks'][:, i:i + h, j:j + w] + fields.append("masks") + + # remove elements for which the boxes or masks that have zero area + if "boxes" in target or "masks" in target: + # favor boxes selection when defining which elements to keep + # this is compatible with previous implementation + if "boxes" in target: + cropped_boxes = target['boxes'].reshape(-1, 2, 2) + keep = torch.all(cropped_boxes[:, 1, :] > cropped_boxes[:, 0, :], dim=1) + else: + keep = target['masks'].flatten(1).any(1) + + for field in fields: + target[field] = target[field][keep] + + return cropped_image, target + + +def random_shift(image, target, region, sizes): + oh, ow = sizes + # step 1, shift crop and re-scale image firstly + cropped_image = F.crop(image, *region) + cropped_image = F.resize(cropped_image, sizes) + + target = target.copy() + i, j, h, w = region + + # should we do something wrt the original size? + target["size"] = torch.tensor([h, w]) + + fields = ["labels", "area", "iscrowd"] + if 'obj_ids' in target: + fields.append('obj_ids') + + if "boxes" in target: + boxes = target["boxes"] + max_size = torch.as_tensor([w, h], dtype=torch.float32) + cropped_boxes = boxes - torch.as_tensor([j, i, j, i]) + + for i, box in enumerate(cropped_boxes): + l, t, r, b = box + if l < 0: + l = 0 + if r < 0: + r = 0 + if l > w: + l = w + if r > w: + r = w + if t < 0: + t = 0 + if b < 0: + b = 0 + if t > h: + t = h + if b > h: + b = h + # step 2, re-scale coords secondly + ratio_h = 1.0 * oh / h + ratio_w = 1.0 * ow / w + cropped_boxes[i] = torch.tensor([ratio_w * l, ratio_h * t, ratio_w * r, ratio_h * b], dtype=box.dtype) + + cropped_boxes = cropped_boxes.reshape(-1, 2, 2) + area = (cropped_boxes[:, 1, :] - cropped_boxes[:, 0, :]).prod(dim=1) + target["boxes"] = cropped_boxes.reshape(-1, 4) + target["area"] = area + fields.append("boxes") + + if "masks" in target: + # FIXME should we update the area here if there are no boxes? + target['masks'] = target['masks'][:, i:i + h, j:j + w] + fields.append("masks") + + # remove elements for which the boxes or masks that have zero area + if "boxes" in target or "masks" in target: + # favor boxes selection when defining which elements to keep + # this is compatible with previous implementation + if "boxes" in target: + cropped_boxes = target['boxes'].reshape(-1, 2, 2) + keep = torch.all(cropped_boxes[:, 1, :] > cropped_boxes[:, 0, :], dim=1) + else: + keep = target['masks'].flatten(1).any(1) + + for field in fields: + target[field] = target[field][keep] + + return cropped_image, target + + +def crop(image, target, region): + cropped_image = F.crop(image, *region) + + target = target.copy() + i, j, h, w = region + + # should we do something wrt the original size? + target["size"] = torch.tensor([h, w]) + + fields = ["labels", "area", "iscrowd"] + if 'obj_ids' in target: + fields.append('obj_ids') + + if "boxes" in target: + boxes = target["boxes"] + max_size = torch.as_tensor([w, h], dtype=torch.float32) + cropped_boxes = boxes - torch.as_tensor([j, i, j, i]) + cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size) + cropped_boxes = cropped_boxes.clamp(min=0) + + area = (cropped_boxes[:, 1, :] - cropped_boxes[:, 0, :]).prod(dim=1) + target["boxes"] = cropped_boxes.reshape(-1, 4) + target["area"] = area + fields.append("boxes") + + if "masks" in target: + # FIXME should we update the area here if there are no boxes? + target['masks'] = target['masks'][:, i:i + h, j:j + w] + fields.append("masks") + + # remove elements for which the boxes or masks that have zero area + if "boxes" in target or "masks" in target: + # favor boxes selection when defining which elements to keep + # this is compatible with previous implementation + if "boxes" in target: + cropped_boxes = target['boxes'].reshape(-1, 2, 2) + keep = torch.all(cropped_boxes[:, 1, :] > cropped_boxes[:, 0, :], dim=1) + else: + keep = target['masks'].flatten(1).any(1) + + for field in fields: + target[field] = target[field][keep] + + return cropped_image, target + + +def hflip(image, target): + flipped_image = F.hflip(image) + + w, h = image.size + + target = target.copy() + if "boxes" in target: + boxes = target["boxes"] + boxes = boxes[:, [2, 1, 0, 3]] * torch.as_tensor([-1, 1, -1, 1]) + torch.as_tensor([w, 0, w, 0]) + target["boxes"] = boxes + + if "masks" in target: + target['masks'] = target['masks'].flip(-1) + + return flipped_image, target + + +def resize(image, target, size, max_size=None): + # size can be min_size (scalar) or (w, h) tuple + + def get_size_with_aspect_ratio(image_size, size, max_size=None): + w, h = image_size + if max_size is not None: + min_original_size = float(min((w, h))) + max_original_size = float(max((w, h))) + if max_original_size / min_original_size * size > max_size: + size = int(round(max_size * min_original_size / max_original_size)) + + if (w <= h and w == size) or (h <= w and h == size): + return (h, w) + + if w < h: + ow = size + oh = int(size * h / w) + else: + oh = size + ow = int(size * w / h) + + return (oh, ow) + + def get_size(image_size, size, max_size=None): + if isinstance(size, (list, tuple)): + return size[::-1] + else: + return get_size_with_aspect_ratio(image_size, size, max_size) + + size = get_size(image.size, size, max_size) + rescaled_image = F.resize(image, size) + + if target is None: + return rescaled_image, None + + ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size)) + ratio_width, ratio_height = ratios + + target = target.copy() + if "boxes" in target: + boxes = target["boxes"] + scaled_boxes = boxes * torch.as_tensor([ratio_width, ratio_height, ratio_width, ratio_height]) + target["boxes"] = scaled_boxes + + if "area" in target: + area = target["area"] + scaled_area = area * (ratio_width * ratio_height) + target["area"] = scaled_area + + h, w = size + target["size"] = torch.tensor([h, w]) + + if "masks" in target: + target['masks'] = interpolate( + target['masks'][:, None].float(), size, mode="nearest")[:, 0] > 0.5 + + return rescaled_image, target + + +def pad(image, target, padding): + # assumes that we only pad on the bottom right corners + padded_image = F.pad(image, (0, 0, padding[0], padding[1])) + if target is None: + return padded_image, None + target = target.copy() + # should we do something wrt the original size? + target["size"] = torch.tensor(padded_image[::-1]) + if "masks" in target: + target['masks'] = torch.nn.functional.pad(target['masks'], (0, padding[0], 0, padding[1])) + return padded_image, target + + +class RandomCrop(object): + def __init__(self, size): + self.size = size + + def __call__(self, img, target): + region = T.RandomCrop.get_params(img, self.size) + return crop(img, target, region) + + +class MotRandomCrop(RandomCrop): + def __call__(self, imgs: list, targets: list): + ret_imgs = [] + ret_targets = [] + region = T.RandomCrop.get_params(imgs[0], self.size) + for img_i, targets_i in zip(imgs, targets): + img_i, targets_i = crop(img_i, targets_i, region) + ret_imgs.append(img_i) + ret_targets.append(targets_i) + return ret_imgs, ret_targets + +class FixedMotRandomCrop(object): + def __init__(self, min_size: int, max_size: int): + self.min_size = min_size + self.max_size = max_size + + def __call__(self, imgs: list, targets: list): + ret_imgs = [] + ret_targets = [] + w = random.randint(self.min_size, min(imgs[0].width, self.max_size)) + h = random.randint(self.min_size, min(imgs[0].height, self.max_size)) + region = T.RandomCrop.get_params(imgs[0], [h, w]) + for img_i, targets_i in zip(imgs, targets): + img_i, targets_i = crop_mot(img_i, targets_i, region) + ret_imgs.append(img_i) + ret_targets.append(targets_i) + return ret_imgs, ret_targets + +class MotRandomShift(object): + def __init__(self, bs=1): + self.bs = bs + + def __call__(self, imgs: list, targets: list): + ret_imgs = copy.deepcopy(imgs) + ret_targets = copy.deepcopy(targets) + + n_frames = len(imgs) + select_i = random.choice(list(range(n_frames))) + w, h = imgs[select_i].size + + xshift = (100 * torch.rand(self.bs)).int() + xshift *= (torch.randn(self.bs) > 0.0).int() * 2 - 1 + yshift = (100 * torch.rand(self.bs)).int() + yshift *= (torch.randn(self.bs) > 0.0).int() * 2 - 1 + ymin = max(0, -yshift[0]) + ymax = min(h, h - yshift[0]) + xmin = max(0, -xshift[0]) + xmax = min(w, w - xshift[0]) + + region = (int(ymin), int(xmin), int(ymax-ymin), int(xmax-xmin)) + ret_imgs[select_i], ret_targets[select_i] = random_shift(imgs[select_i], targets[select_i], region, (h,w)) + + return ret_imgs, ret_targets + + +class FixedMotRandomShift(object): + def __init__(self, bs=1, padding=50): + self.bs = bs + self.padding = padding + + def __call__(self, imgs: list, targets: list): + ret_imgs = [] + ret_targets = [] + + n_frames = len(imgs) + w, h = imgs[0].size + xshift = (self.padding * torch.rand(self.bs)).int() + 1 + xshift *= (torch.randn(self.bs) > 0.0).int() * 2 - 1 + yshift = (self.padding * torch.rand(self.bs)).int() + 1 + yshift *= (torch.randn(self.bs) > 0.0).int() * 2 - 1 + ret_imgs.append(imgs[0]) + ret_targets.append(targets[0]) + for i in range(1, n_frames): + ymin = max(0, -yshift[0]) + ymax = min(h, h - yshift[0]) + xmin = max(0, -xshift[0]) + xmax = min(w, w - xshift[0]) + prev_img = ret_imgs[i-1].copy() + prev_target = copy.deepcopy(ret_targets[i-1]) + region = (int(ymin), int(xmin), int(ymax - ymin), int(xmax - xmin)) + img_i, target_i = random_shift(prev_img, prev_target, region, (h, w)) + ret_imgs.append(img_i) + ret_targets.append(target_i) + + return ret_imgs, ret_targets + + +class RandomSizeCrop(object): + def __init__(self, min_size: int, max_size: int): + self.min_size = min_size + self.max_size = max_size + + def __call__(self, img: PIL.Image.Image, target: dict): + w = random.randint(self.min_size, min(img.width, self.max_size)) + h = random.randint(self.min_size, min(img.height, self.max_size)) + region = T.RandomCrop.get_params(img, [h, w]) + return crop(img, target, region) + + +class MotRandomSizeCrop(RandomSizeCrop): + def __call__(self, imgs, targets): + w = random.randint(self.min_size, min(imgs[0].width, self.max_size)) + h = random.randint(self.min_size, min(imgs[0].height, self.max_size)) + region = T.RandomCrop.get_params(imgs[0], [h, w]) + ret_imgs = [] + ret_targets = [] + for img_i, targets_i in zip(imgs, targets): + img_i, targets_i = crop(img_i, targets_i, region) + ret_imgs.append(img_i) + ret_targets.append(targets_i) + return ret_imgs, ret_targets + + +class CenterCrop(object): + def __init__(self, size): + self.size = size + + def __call__(self, img, target): + image_width, image_height = img.size + crop_height, crop_width = self.size + crop_top = int(round((image_height - crop_height) / 2.)) + crop_left = int(round((image_width - crop_width) / 2.)) + return crop(img, target, (crop_top, crop_left, crop_height, crop_width)) + + +class MotCenterCrop(CenterCrop): + def __call__(self, imgs, targets): + image_width, image_height = imgs[0].size + crop_height, crop_width = self.size + crop_top = int(round((image_height - crop_height) / 2.)) + crop_left = int(round((image_width - crop_width) / 2.)) + ret_imgs = [] + ret_targets = [] + for img_i, targets_i in zip(imgs, targets): + img_i, targets_i = crop(img_i, targets_i, (crop_top, crop_left, crop_height, crop_width)) + ret_imgs.append(img_i) + ret_targets.append(targets_i) + return ret_imgs, ret_targets + + +class RandomHorizontalFlip(object): + def __init__(self, p=0.5): + self.p = p + + def __call__(self, img, target): + if random.random() < self.p: + return hflip(img, target) + return img, target + + +class MotRandomHorizontalFlip(RandomHorizontalFlip): + def __call__(self, imgs, targets): + if random.random() < self.p: + ret_imgs = [] + ret_targets = [] + for img_i, targets_i in zip(imgs, targets): + img_i, targets_i = hflip(img_i, targets_i) + ret_imgs.append(img_i) + ret_targets.append(targets_i) + return ret_imgs, ret_targets + return imgs, targets + + +class RandomResize(object): + def __init__(self, sizes, max_size=None): + assert isinstance(sizes, (list, tuple)) + self.sizes = sizes + self.max_size = max_size + + def __call__(self, img, target=None): + size = random.choice(self.sizes) + return resize(img, target, size, self.max_size) + + +class MotRandomResize(RandomResize): + def __call__(self, imgs, targets): + size = random.choice(self.sizes) + ret_imgs = [] + ret_targets = [] + for img_i, targets_i in zip(imgs, targets): + img_i, targets_i = resize(img_i, targets_i, size, self.max_size) + ret_imgs.append(img_i) + ret_targets.append(targets_i) + return ret_imgs, ret_targets + + +class RandomPad(object): + def __init__(self, max_pad): + self.max_pad = max_pad + + def __call__(self, img, target): + pad_x = random.randint(0, self.max_pad) + pad_y = random.randint(0, self.max_pad) + return pad(img, target, (pad_x, pad_y)) + + +class MotRandomPad(RandomPad): + def __call__(self, imgs, targets): + pad_x = random.randint(0, self.max_pad) + pad_y = random.randint(0, self.max_pad) + ret_imgs = [] + ret_targets = [] + for img_i, targets_i in zip(imgs, targets): + img_i, target_i = pad(img_i, targets_i, (pad_x, pad_y)) + ret_imgs.append(img_i) + ret_targets.append(targets_i) + return ret_imgs, ret_targets + + +class RandomSelect(object): + """ + Randomly selects between transforms1 and transforms2, + with probability p for transforms1 and (1 - p) for transforms2 + """ + def __init__(self, transforms1, transforms2, p=0.5): + self.transforms1 = transforms1 + self.transforms2 = transforms2 + self.p = p + + def __call__(self, img, target): + if random.random() < self.p: + return self.transforms1(img, target) + return self.transforms2(img, target) + + +class MotRandomSelect(RandomSelect): + """ + Randomly selects between transforms1 and transforms2, + with probability p for transforms1 and (1 - p) for transforms2 + """ + def __call__(self, imgs, targets): + if random.random() < self.p: + return self.transforms1(imgs, targets) + return self.transforms2(imgs, targets) + + +class ToTensor(object): + def __call__(self, img, target): + return F.to_tensor(img), target + + +class MotToTensor(ToTensor): + def __call__(self, imgs, targets): + ret_imgs = [] + for img in imgs: + ret_imgs.append(F.to_tensor(img)) + return ret_imgs, targets + + +class RandomErasing(object): + + def __init__(self, *args, **kwargs): + self.eraser = T.RandomErasing(*args, **kwargs) + + def __call__(self, img, target): + return self.eraser(img), target + + +class MotRandomErasing(RandomErasing): + def __call__(self, imgs, targets): + # TODO: Rewrite this part to ensure the data augmentation is same to each image. + ret_imgs = [] + for img_i, targets_i in zip(imgs, targets): + ret_imgs.append(self.eraser(img_i)) + return ret_imgs, targets + + +class MoTColorJitter(T.ColorJitter): + def __call__(self, imgs, targets): + transform = self.get_params(self.brightness, self.contrast, + self.saturation, self.hue) + ret_imgs = [] + for img_i, targets_i in zip(imgs, targets): + ret_imgs.append(transform(img_i)) + return ret_imgs, targets + + +class Normalize(object): + def __init__(self, mean, std): + self.mean = mean + self.std = std + + def __call__(self, image, target=None): + if target is not None: + target['ori_img'] = image.clone() + image = F.normalize(image, mean=self.mean, std=self.std) + if target is None: + return image, None + target = target.copy() + h, w = image.shape[-2:] + if "boxes" in target: + boxes = target["boxes"] + boxes = box_xyxy_to_cxcywh(boxes) + boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32) + target["boxes"] = boxes + return image, target + + +class MotNormalize(Normalize): + def __call__(self, imgs, targets=None): + ret_imgs = [] + ret_targets = [] + for i in range(len(imgs)): + img_i = imgs[i] + targets_i = targets[i] if targets is not None else None + img_i, targets_i = super().__call__(img_i, targets_i) + ret_imgs.append(img_i) + ret_targets.append(targets_i) + return ret_imgs, ret_targets + + +class Compose(object): + def __init__(self, transforms): + self.transforms = transforms + + def __call__(self, image, target): + for t in self.transforms: + image, target = t(image, target) + return image, target + + def __repr__(self): + format_string = self.__class__.__name__ + "(" + for t in self.transforms: + format_string += "\n" + format_string += " {0}".format(t) + format_string += "\n)" + return format_string + + +class MotCompose(Compose): + def __call__(self, imgs, targets): + for t in self.transforms: + imgs, targets = t(imgs, targets) + return imgs, targets diff --git a/tutorials/qdtrack/README.md b/tutorials/qdtrack/README.md new file mode 100644 index 0000000000000000000000000000000000000000..47578f2014bdcea0e723584584b9e268f012e1b3 --- /dev/null +++ b/tutorials/qdtrack/README.md @@ -0,0 +1,39 @@ +# QDTrack_reid_motion + +Step1. git clone https://github.com/SysCV/qdtrack.git and train + + +Step2. + +replace https://github.com/SysCV/qdtrack/blob/master/qdtrack/models/mot/qdtrack.py + +add mot_online to https://github.com/SysCV/qdtrack + +add tracker_reid_motion.py to https://github.com/SysCV/qdtrack and rename to tracker.py + +Step3. download qdtrack model trained on mot17 half training set: [google](https://drive.google.com/file/d/1IfM8i0R0lF_4NOgeloMPFo5d52dqhaHW/view?usp=sharing), [baidu(code:whcc)](https://pan.baidu.com/s/1IYRD3V2YOa6-YNFgMQyv7w) + +Step4. run +``` +python3 -m torch.distributed.launch --nproc_per_node=8 --master_port=29501 tools/test.py configs/mot17/qdtrack-frcnn_r50_fpn_4e_mot17.py work_dirs/mot17_half_qdtrack.pth --launcher pytorch --eval track --eval-options resfile_path=output +``` + + +# QDTrack_BYTE + +Step1. git clone https://github.com/SysCV/qdtrack.git and train + + +Step2. + +replace https://github.com/SysCV/qdtrack/blob/master/qdtrack/models/mot/qdtrack.py + +add mot_online to https://github.com/SysCV/qdtrack + +add byte_tracker.py to https://github.com/SysCV/qdtrack + + +Step3. run +``` +python3 -m torch.distributed.launch --nproc_per_node=8 --master_port=29501 tools/test.py configs/mot17/qdtrack-frcnn_r50_fpn_4e_mot17.py work_dirs/mot17_half_qdtrack.pth --launcher pytorch --eval track --eval-options resfile_path=output +``` diff --git a/tutorials/qdtrack/byte_tracker.py b/tutorials/qdtrack/byte_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..0a292293e52327be6b6f2ff8f227c87f1da0527b --- /dev/null +++ b/tutorials/qdtrack/byte_tracker.py @@ -0,0 +1,353 @@ +import numpy as np +from collections import deque +import os +import os.path as osp +import copy +import torch +import torch.nn.functional as F + +from mot_online.kalman_filter import KalmanFilter +from mot_online.basetrack import BaseTrack, TrackState +from mot_online import matching + + + +class STrack(BaseTrack): + shared_kalman = KalmanFilter() + def __init__(self, tlwh, score): + + # wait activate + self._tlwh = np.asarray(tlwh, dtype=np.float) + self.kalman_filter = None + self.mean, self.covariance = None, None + self.is_activated = False + + self.score = score + self.tracklet_len = 0 + + def predict(self): + mean_state = self.mean.copy() + if self.state != TrackState.Tracked: + mean_state[7] = 0 + self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) + + @staticmethod + def multi_predict(stracks): + if len(stracks) > 0: + multi_mean = np.asarray([st.mean.copy() for st in stracks]) + multi_covariance = np.asarray([st.covariance for st in stracks]) + for i, st in enumerate(stracks): + if st.state != TrackState.Tracked: + multi_mean[i][7] = 0 + multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance) + for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): + stracks[i].mean = mean + stracks[i].covariance = cov + + def activate(self, kalman_filter, frame_id): + """Start a new tracklet""" + self.kalman_filter = kalman_filter + self.track_id = self.next_id() + self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) + + self.tracklet_len = 0 + self.state = TrackState.Tracked + if frame_id == 1: + self.is_activated = True + # self.is_activated = True + self.frame_id = frame_id + self.start_frame = frame_id + + def re_activate(self, new_track, frame_id, new_id=False): + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) + ) + self.tracklet_len = 0 + self.state = TrackState.Tracked + self.is_activated = True + self.frame_id = frame_id + if new_id: + self.track_id = self.next_id() + self.score = new_track.score + + def update(self, new_track, frame_id): + """ + Update a matched track + :type new_track: STrack + :type frame_id: int + :type update_feature: bool + :return: + """ + self.frame_id = frame_id + self.tracklet_len += 1 + + new_tlwh = new_track.tlwh + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) + self.state = TrackState.Tracked + self.is_activated = True + + self.score = new_track.score + + @property + # @jit(nopython=True) + def tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + """ + if self.mean is None: + return self._tlwh.copy() + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + @property + # @jit(nopython=True) + def tlbr(self): + """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_xyah(tlwh): + """Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = np.asarray(tlwh).copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + def to_xyah(self): + return self.tlwh_to_xyah(self.tlwh) + + @staticmethod + # @jit(nopython=True) + def tlbr_to_tlwh(tlbr): + ret = np.asarray(tlbr).copy() + ret[2:] -= ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_tlbr(tlwh): + ret = np.asarray(tlwh).copy() + ret[2:] += ret[:2] + return ret + + def __repr__(self): + return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) + + +class BYTETracker(object): + def __init__(self, frame_rate=30): + self.tracked_stracks = [] # type: list[STrack] + self.lost_stracks = [] # type: list[STrack] + self.removed_stracks = [] # type: list[STrack] + + self.frame_id = 0 + + self.low_thresh = 0.2 + self.track_thresh = 0.8 + self.det_thresh = self.track_thresh + 0.1 + + + self.buffer_size = int(frame_rate / 30.0 * 30) + self.max_time_lost = self.buffer_size + self.kalman_filter = KalmanFilter() + +# def update(self, output_results): + def update(self, det_bboxes, det_labels, frame_id, track_feats=None): + +# self.frame_id += 1 + self.frame_id = frame_id + 1 + activated_starcks = [] + refind_stracks = [] + lost_stracks = [] + removed_stracks = [] + +# scores = output_results[:, 4] +# bboxes = output_results[:, :4] # x1y1x2y2 + scores = det_bboxes[:, 4].cpu().numpy() + bboxes = det_bboxes[:, :4].cpu().numpy() + + remain_inds = scores > self.track_thresh + dets = bboxes[remain_inds] + scores_keep = scores[remain_inds] + + + inds_low = scores > self.low_thresh + inds_high = scores < self.track_thresh + inds_second = np.logical_and(inds_low, inds_high) + dets_second = bboxes[inds_second] + scores_second = scores[inds_second] + + + if len(dets) > 0: + '''Detections''' + detections = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for + (tlbr, s) in zip(dets, scores_keep)] + else: + detections = [] + + ''' Add newly detected tracklets to tracked_stracks''' + unconfirmed = [] + tracked_stracks = [] # type: list[STrack] + for track in self.tracked_stracks: + if not track.is_activated: + unconfirmed.append(track) + else: + tracked_stracks.append(track) + + ''' Step 2: First association, with Kalman and IOU''' + strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) + # Predict the current location with KF + STrack.multi_predict(strack_pool) + dists = matching.iou_distance(strack_pool, detections) + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.8) + + for itracked, idet in matches: + track = strack_pool[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + track.update(detections[idet], self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + ''' Step 3: Second association, with IOU''' + # association the untrack to the low score detections + if len(dets_second) > 0: + '''Detections''' + detections_second = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for + (tlbr, s) in zip(dets_second, scores_second)] + else: + detections_second = [] + r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] + dists = matching.iou_distance(r_tracked_stracks, detections_second) + matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5) + for itracked, idet in matches: + track = r_tracked_stracks[itracked] + det = detections_second[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + for it in u_track: + #track = strack_pool[it] + track = r_tracked_stracks[it] + if not track.state == TrackState.Lost: + track.mark_lost() + lost_stracks.append(track) + + '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' + detections = [detections[i] for i in u_detection] + dists = matching.iou_distance(unconfirmed, detections) + matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) + for itracked, idet in matches: + unconfirmed[itracked].update(detections[idet], self.frame_id) + activated_starcks.append(unconfirmed[itracked]) + for it in u_unconfirmed: + track = unconfirmed[it] + track.mark_removed() + removed_stracks.append(track) + + """ Step 4: Init new stracks""" + for inew in u_detection: + track = detections[inew] + if track.score < self.det_thresh: + continue + track.activate(self.kalman_filter, self.frame_id) + activated_starcks.append(track) + """ Step 5: Update state""" + for track in self.lost_stracks: + if self.frame_id - track.end_frame > self.max_time_lost: + track.mark_removed() + removed_stracks.append(track) + + # print('Ramained match {} s'.format(t4-t3)) + + self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] + self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) + self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) + self.lost_stracks.extend(lost_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) + self.removed_stracks.extend(removed_stracks) + self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) + # get scores of lost tracks + output_stracks = [track for track in self.tracked_stracks if track.is_activated] + +# return output_stracks + + bboxes = [] + labels = [] + ids = [] + for track in output_stracks: + if track.is_activated: + track_bbox = track.tlbr + bboxes.append([track_bbox[0], track_bbox[1], track_bbox[2], track_bbox[3], track.score]) + labels.append(0) + ids.append(track.track_id) + return torch.tensor(bboxes), torch.tensor(labels), torch.tensor(ids) + +def joint_stracks(tlista, tlistb): + exists = {} + res = [] + for t in tlista: + exists[t.track_id] = 1 + res.append(t) + for t in tlistb: + tid = t.track_id + if not exists.get(tid, 0): + exists[tid] = 1 + res.append(t) + return res + + +def sub_stracks(tlista, tlistb): + stracks = {} + for t in tlista: + stracks[t.track_id] = t + for t in tlistb: + tid = t.track_id + if stracks.get(tid, 0): + del stracks[tid] + return list(stracks.values()) + + +def remove_duplicate_stracks(stracksa, stracksb): + pdist = matching.iou_distance(stracksa, stracksb) + pairs = np.where(pdist < 0.15) + dupa, dupb = list(), list() + for p, q in zip(*pairs): + timep = stracksa[p].frame_id - stracksa[p].start_frame + timeq = stracksb[q].frame_id - stracksb[q].start_frame + if timep > timeq: + dupb.append(q) + else: + dupa.append(p) + resa = [t for i, t in enumerate(stracksa) if not i in dupa] + resb = [t for i, t in enumerate(stracksb) if not i in dupb] + return resa, resb + + +def remove_fp_stracks(stracksa, n_frame=10): + remain = [] + for t in stracksa: + score_5 = t.score_list[-n_frame:] + score_5 = np.array(score_5, dtype=np.float32) + index = score_5 < 0.45 + num = np.sum(index) + if num < n_frame: + remain.append(t) + return remain diff --git a/tutorials/qdtrack/mot_online/basetrack.py b/tutorials/qdtrack/mot_online/basetrack.py new file mode 100644 index 0000000000000000000000000000000000000000..4fe2233607f6d4ed28b11a0ae6c0303c8ca19098 --- /dev/null +++ b/tutorials/qdtrack/mot_online/basetrack.py @@ -0,0 +1,52 @@ +import numpy as np +from collections import OrderedDict + + +class TrackState(object): + New = 0 + Tracked = 1 + Lost = 2 + Removed = 3 + + +class BaseTrack(object): + _count = 0 + + track_id = 0 + is_activated = False + state = TrackState.New + + history = OrderedDict() + features = [] + curr_feature = None + score = 0 + start_frame = 0 + frame_id = 0 + time_since_update = 0 + + # multi-camera + location = (np.inf, np.inf) + + @property + def end_frame(self): + return self.frame_id + + @staticmethod + def next_id(): + BaseTrack._count += 1 + return BaseTrack._count + + def activate(self, *args): + raise NotImplementedError + + def predict(self): + raise NotImplementedError + + def update(self, *args, **kwargs): + raise NotImplementedError + + def mark_lost(self): + self.state = TrackState.Lost + + def mark_removed(self): + self.state = TrackState.Removed diff --git a/tutorials/qdtrack/mot_online/kalman_filter.py b/tutorials/qdtrack/mot_online/kalman_filter.py new file mode 100644 index 0000000000000000000000000000000000000000..b4c4e9854d8abd2fea75ad6b1fe8cd6846c43680 --- /dev/null +++ b/tutorials/qdtrack/mot_online/kalman_filter.py @@ -0,0 +1,269 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np +import scipy.linalg + +""" +Table for the 0.95 quantile of the chi-square distribution with N degrees of +freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv +function and used as Mahalanobis gating threshold. +""" +chi2inv95 = { + 1: 3.8415, + 2: 5.9915, + 3: 7.8147, + 4: 9.4877, + 5: 11.070, + 6: 12.592, + 7: 14.067, + 8: 15.507, + 9: 16.919} + + +class KalmanFilter(object): + """ + A simple Kalman filter for tracking bounding boxes in image space. + + The 8-dimensional state space + + x, y, a, h, vx, vy, va, vh + + contains the bounding box center position (x, y), aspect ratio a, height h, + and their respective velocities. + + Object motion follows a constant velocity model. The bounding box location + (x, y, a, h) is taken as direct observation of the state space (linear + observation model). + + """ + + def __init__(self): + ndim, dt = 4, 1. + + # Create Kalman filter model matrices. + self._motion_mat = np.eye(2 * ndim, 2 * ndim) + for i in range(ndim): + self._motion_mat[i, ndim + i] = dt + self._update_mat = np.eye(ndim, 2 * ndim) + + # Motion and observation uncertainty are chosen relative to the current + # state estimate. These weights control the amount of uncertainty in + # the model. This is a bit hacky. + self._std_weight_position = 1. / 20 + self._std_weight_velocity = 1. / 160 + + def initiate(self, measurement): + """Create track from unassociated measurement. + + Parameters + ---------- + measurement : ndarray + Bounding box coordinates (x, y, a, h) with center position (x, y), + aspect ratio a, and height h. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector (8 dimensional) and covariance matrix (8x8 + dimensional) of the new track. Unobserved velocities are initialized + to 0 mean. + + """ + mean_pos = measurement + mean_vel = np.zeros_like(mean_pos) + mean = np.r_[mean_pos, mean_vel] + + std = [ + 2 * self._std_weight_position * measurement[3], + 2 * self._std_weight_position * measurement[3], + 1e-2, + 2 * self._std_weight_position * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 1e-5, + 10 * self._std_weight_velocity * measurement[3]] + covariance = np.diag(np.square(std)) + return mean, covariance + + def predict(self, mean, covariance): + """Run Kalman filter prediction step. + + Parameters + ---------- + mean : ndarray + The 8 dimensional mean vector of the object state at the previous + time step. + covariance : ndarray + The 8x8 dimensional covariance matrix of the object state at the + previous time step. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + + """ + std_pos = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-2, + self._std_weight_position * mean[3]] + std_vel = [ + self._std_weight_velocity * mean[3], + self._std_weight_velocity * mean[3], + 1e-5, + self._std_weight_velocity * mean[3]] + motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) + + #mean = np.dot(self._motion_mat, mean) + mean = np.dot(mean, self._motion_mat.T) + covariance = np.linalg.multi_dot(( + self._motion_mat, covariance, self._motion_mat.T)) + motion_cov + + return mean, covariance + + def project(self, mean, covariance): + """Project state distribution to measurement space. + + Parameters + ---------- + mean : ndarray + The state's mean vector (8 dimensional array). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + + Returns + ------- + (ndarray, ndarray) + Returns the projected mean and covariance matrix of the given state + estimate. + + """ + std = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-1, + self._std_weight_position * mean[3]] + innovation_cov = np.diag(np.square(std)) + + mean = np.dot(self._update_mat, mean) + covariance = np.linalg.multi_dot(( + self._update_mat, covariance, self._update_mat.T)) + return mean, covariance + innovation_cov + + def multi_predict(self, mean, covariance): + """Run Kalman filter prediction step (Vectorized version). + Parameters + ---------- + mean : ndarray + The Nx8 dimensional mean matrix of the object states at the previous + time step. + covariance : ndarray + The Nx8x8 dimensional covariance matrics of the object states at the + previous time step. + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[:, 3], + self._std_weight_position * mean[:, 3], + 1e-2 * np.ones_like(mean[:, 3]), + self._std_weight_position * mean[:, 3]] + std_vel = [ + self._std_weight_velocity * mean[:, 3], + self._std_weight_velocity * mean[:, 3], + 1e-5 * np.ones_like(mean[:, 3]), + self._std_weight_velocity * mean[:, 3]] + sqr = np.square(np.r_[std_pos, std_vel]).T + + motion_cov = [] + for i in range(len(mean)): + motion_cov.append(np.diag(sqr[i])) + motion_cov = np.asarray(motion_cov) + + mean = np.dot(mean, self._motion_mat.T) + left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2)) + covariance = np.dot(left, self._motion_mat.T) + motion_cov + + return mean, covariance + + def update(self, mean, covariance, measurement): + """Run Kalman filter correction step. + + Parameters + ---------- + mean : ndarray + The predicted state's mean vector (8 dimensional). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + measurement : ndarray + The 4 dimensional measurement vector (x, y, a, h), where (x, y) + is the center position, a the aspect ratio, and h the height of the + bounding box. + + Returns + ------- + (ndarray, ndarray) + Returns the measurement-corrected state distribution. + + """ + projected_mean, projected_cov = self.project(mean, covariance) + + chol_factor, lower = scipy.linalg.cho_factor( + projected_cov, lower=True, check_finite=False) + kalman_gain = scipy.linalg.cho_solve( + (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, + check_finite=False).T + innovation = measurement - projected_mean + + new_mean = mean + np.dot(innovation, kalman_gain.T) + new_covariance = covariance - np.linalg.multi_dot(( + kalman_gain, projected_cov, kalman_gain.T)) + return new_mean, new_covariance + + def gating_distance(self, mean, covariance, measurements, + only_position=False, metric='maha'): + """Compute gating distance between state distribution and measurements. + A suitable distance threshold can be obtained from `chi2inv95`. If + `only_position` is False, the chi-square distribution has 4 degrees of + freedom, otherwise 2. + Parameters + ---------- + mean : ndarray + Mean vector over the state distribution (8 dimensional). + covariance : ndarray + Covariance of the state distribution (8x8 dimensional). + measurements : ndarray + An Nx4 dimensional matrix of N measurements, each in + format (x, y, a, h) where (x, y) is the bounding box center + position, a the aspect ratio, and h the height. + only_position : Optional[bool] + If True, distance computation is done with respect to the bounding + box center position only. + Returns + ------- + ndarray + Returns an array of length N, where the i-th element contains the + squared Mahalanobis distance between (mean, covariance) and + `measurements[i]`. + """ + mean, covariance = self.project(mean, covariance) + if only_position: + mean, covariance = mean[:2], covariance[:2, :2] + measurements = measurements[:, :2] + + d = measurements - mean + if metric == 'gaussian': + return np.sum(d * d, axis=1) + elif metric == 'maha': + cholesky_factor = np.linalg.cholesky(covariance) + z = scipy.linalg.solve_triangular( + cholesky_factor, d.T, lower=True, check_finite=False, + overwrite_b=True) + squared_maha = np.sum(z * z, axis=0) + return squared_maha + else: + raise ValueError('invalid distance metric') diff --git a/tutorials/qdtrack/mot_online/matching.py b/tutorials/qdtrack/mot_online/matching.py new file mode 100644 index 0000000000000000000000000000000000000000..54cb4be09624cdb68581508bdbdeecdc63539b7c --- /dev/null +++ b/tutorials/qdtrack/mot_online/matching.py @@ -0,0 +1,198 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import lap +import numpy as np +import scipy +from cython_bbox import bbox_overlaps as bbox_ious +from scipy.spatial.distance import cdist + +chi2inv95 = { + 1: 3.8415, + 2: 5.9915, + 3: 7.8147, + 4: 9.4877, + 5: 11.070, + 6: 12.592, + 7: 14.067, + 8: 15.507, + 9: 16.919} + +def merge_matches(m1, m2, shape): + O,P,Q = shape + m1 = np.asarray(m1) + m2 = np.asarray(m2) + + M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P)) + M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q)) + + mask = M1*M2 + match = mask.nonzero() + match = list(zip(match[0], match[1])) + unmatched_O = tuple(set(range(O)) - set([i for i, j in match])) + unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match])) + + return match, unmatched_O, unmatched_Q + + +def _indices_to_matches(cost_matrix, indices, thresh): + matched_cost = cost_matrix[tuple(zip(*indices))] + matched_mask = (matched_cost <= thresh) + + matches = indices[matched_mask] + unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0])) + unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1])) + + return matches, unmatched_a, unmatched_b + + +def linear_assignment(cost_matrix, thresh): + if cost_matrix.size == 0: + return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) + matches, unmatched_a, unmatched_b = [], [], [] + cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) + for ix, mx in enumerate(x): + if mx >= 0: + matches.append([ix, mx]) + unmatched_a = np.where(x < 0)[0] + unmatched_b = np.where(y < 0)[0] + matches = np.asarray(matches) + return matches, unmatched_a, unmatched_b + + +def ious(atlbrs, btlbrs): + """ + Compute cost based on IoU + :type atlbrs: list[tlbr] | np.ndarray + :type atlbrs: list[tlbr] | np.ndarray + + :rtype ious np.ndarray + """ + ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) + if ious.size == 0: + return ious + + ious = bbox_ious( + np.ascontiguousarray(atlbrs, dtype=np.float), + np.ascontiguousarray(btlbrs, dtype=np.float) + ) + + return ious + + +def iou_distance(atracks, btracks): + """ + Compute cost based on IoU + :type atracks: list[STrack] + :type btracks: list[STrack] + + :rtype cost_matrix np.ndarray + """ + + if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): + atlbrs = atracks + btlbrs = btracks + else: + atlbrs = [track.tlbr for track in atracks] + btlbrs = [track.tlbr for track in btracks] + _ious = ious(atlbrs, btlbrs) + cost_matrix = 1 - _ious + + return cost_matrix + +def embedding_distance(tracks, detections, metric='cosine'): + """ + :param tracks: list[STrack] + :param detections: list[BaseTrack] + :param metric: + :return: cost_matrix np.ndarray + """ + + cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) + if cost_matrix.size == 0: + return cost_matrix + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + #for i, track in enumerate(tracks): + #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + return cost_matrix + +def embedding_distance2(tracks, detections, metric='cosine'): + """ + :param tracks: list[STrack] + :param detections: list[BaseTrack] + :param metric: + :return: cost_matrix np.ndarray + """ + + cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) + if cost_matrix.size == 0: + return cost_matrix + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + #for i, track in enumerate(tracks): + #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + track_features = np.asarray([track.features[0] for track in tracks], dtype=np.float) + cost_matrix2 = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + track_features = np.asarray([track.features[len(track.features)-1] for track in tracks], dtype=np.float) + cost_matrix3 = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + for row in range(len(cost_matrix)): + cost_matrix[row] = (cost_matrix[row]+cost_matrix2[row]+cost_matrix3[row])/3 + return cost_matrix + + +def vis_id_feature_A_distance(tracks, detections, metric='cosine'): + track_features = [] + det_features = [] + leg1 = len(tracks) + leg2 = len(detections) + cost_matrix = np.zeros((leg1, leg2), dtype=np.float) + cost_matrix_det = np.zeros((leg1, leg2), dtype=np.float) + cost_matrix_track = np.zeros((leg1, leg2), dtype=np.float) + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + if leg2 != 0: + cost_matrix_det = np.maximum(0.0, cdist(det_features, det_features, metric)) + if leg1 != 0: + cost_matrix_track = np.maximum(0.0, cdist(track_features, track_features, metric)) + if cost_matrix.size == 0: + return track_features, det_features, cost_matrix, cost_matrix_det, cost_matrix_track + cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) + if leg1 > 10: + leg1 = 10 + tracks = tracks[:10] + if leg2 > 10: + leg2 = 10 + detections = detections[:10] + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + return track_features, det_features, cost_matrix, cost_matrix_det, cost_matrix_track + +def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False): + if cost_matrix.size == 0: + return cost_matrix + gating_dim = 2 if only_position else 4 + gating_threshold = chi2inv95[gating_dim] + measurements = np.asarray([det.to_xyah() for det in detections]) + for row, track in enumerate(tracks): + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position) + cost_matrix[row, gating_distance > gating_threshold] = np.inf + return cost_matrix + + +def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98): + if cost_matrix.size == 0: + return cost_matrix + gating_dim = 2 if only_position else 4 + gating_threshold = chi2inv95[gating_dim] + measurements = np.asarray([det.to_xyah() for det in detections]) + for row, track in enumerate(tracks): + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position, metric='maha') + cost_matrix[row, gating_distance > gating_threshold] = np.inf + cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance + return cost_matrix diff --git a/tutorials/qdtrack/qdtrack.py b/tutorials/qdtrack/qdtrack.py new file mode 100644 index 0000000000000000000000000000000000000000..bbecbc04828e8af0bad0c2fa46a88ee76c7c9473 --- /dev/null +++ b/tutorials/qdtrack/qdtrack.py @@ -0,0 +1,164 @@ +import numpy as np +from mmdet.core import bbox2result +from mmdet.models import TwoStageDetector + +from qdtrack.core import track2result +from ..builder import MODELS, build_tracker +from qdtrack.core import imshow_tracks, restore_result +from tracker import BYTETracker + + +@MODELS.register_module() +class QDTrack(TwoStageDetector): + + def __init__(self, tracker=None, freeze_detector=False, *args, **kwargs): + self.prepare_cfg(kwargs) + super().__init__(*args, **kwargs) + self.tracker_cfg = tracker + + self.freeze_detector = freeze_detector + if self.freeze_detector: + self._freeze_detector() + + def _freeze_detector(self): + + self.detector = [ + self.backbone, self.neck, self.rpn_head, self.roi_head.bbox_head + ] + for model in self.detector: + model.eval() + for param in model.parameters(): + param.requires_grad = False + + def prepare_cfg(self, kwargs): + if kwargs.get('train_cfg', False): + kwargs['roi_head']['track_train_cfg'] = kwargs['train_cfg'].get( + 'embed', None) + + def init_tracker(self): +# self.tracker = build_tracker(self.tracker_cfg) + self.tracker = BYTETracker() + + def forward_train(self, + img, + img_metas, + gt_bboxes, + gt_labels, + gt_match_indices, + ref_img, + ref_img_metas, + ref_gt_bboxes, + ref_gt_labels, + ref_gt_match_indices, + gt_bboxes_ignore=None, + gt_masks=None, + ref_gt_bboxes_ignore=None, + ref_gt_masks=None, + **kwargs): + x = self.extract_feat(img) + + losses = dict() + + # RPN forward and loss + proposal_cfg = self.train_cfg.get('rpn_proposal', self.test_cfg.rpn) + rpn_losses, proposal_list = self.rpn_head.forward_train( + x, + img_metas, + gt_bboxes, + gt_labels=None, + gt_bboxes_ignore=gt_bboxes_ignore, + proposal_cfg=proposal_cfg) + losses.update(rpn_losses) + + ref_x = self.extract_feat(ref_img) + ref_proposals = self.rpn_head.simple_test_rpn(ref_x, ref_img_metas) + + roi_losses = self.roi_head.forward_train( + x, img_metas, proposal_list, gt_bboxes, gt_labels, + gt_match_indices, ref_x, ref_img_metas, ref_proposals, + ref_gt_bboxes, ref_gt_labels, gt_bboxes_ignore, gt_masks, + ref_gt_bboxes_ignore, **kwargs) + losses.update(roi_losses) + + return losses + + def simple_test(self, img, img_metas, rescale=False): + # TODO inherit from a base tracker + assert self.roi_head.with_track, 'Track head must be implemented.' + frame_id = img_metas[0].get('frame_id', -1) + if frame_id == 0: + self.init_tracker() + + x = self.extract_feat(img) + proposal_list = self.rpn_head.simple_test_rpn(x, img_metas) + det_bboxes, det_labels, track_feats = self.roi_head.simple_test(x, img_metas, proposal_list, rescale) + + bboxes, labels, ids = self.tracker.update(det_bboxes, det_labels, frame_id, track_feats) + +# if track_feats is not None: +# bboxes, labels, ids = self.tracker.match( +# bboxes=det_bboxes, +# labels=det_labels, +# track_feats=track_feats, +# frame_id=frame_id) + + bbox_result = bbox2result(det_bboxes, det_labels, + self.roi_head.bbox_head.num_classes) + + if track_feats is not None: + track_result = track2result(bboxes, labels, ids, + self.roi_head.bbox_head.num_classes) + else: + track_result = [ + np.zeros((0, 6), dtype=np.float32) + for i in range(self.roi_head.bbox_head.num_classes) + ] + return dict(bbox_results=bbox_result, track_results=track_result) + + def show_result(self, + img, + result, + thickness=1, + font_scale=0.5, + show=False, + out_file=None, + wait_time=0, + backend='cv2', + **kwargs): + """Visualize tracking results. + + Args: + img (str | ndarray): Filename of loaded image. + result (dict): Tracking result. + The value of key 'track_results' is ndarray with shape (n, 6) + in [id, tl_x, tl_y, br_x, br_y, score] format. + The value of key 'bbox_results' is ndarray with shape (n, 5) + in [tl_x, tl_y, br_x, br_y, score] format. + thickness (int, optional): Thickness of lines. Defaults to 1. + font_scale (float, optional): Font scales of texts. Defaults + to 0.5. + show (bool, optional): Whether show the visualizations on the + fly. Defaults to False. + out_file (str | None, optional): Output filename. Defaults to None. + backend (str, optional): Backend to draw the bounding boxes, + options are `cv2` and `plt`. Defaults to 'cv2'. + + Returns: + ndarray: Visualized image. + """ + assert isinstance(result, dict) + track_result = result.get('track_results', None) + bboxes, labels, ids = restore_result(track_result, return_ids=True) + img = imshow_tracks( + img, + bboxes, + labels, + ids, + classes=self.CLASSES, + thickness=thickness, + font_scale=font_scale, + show=show, + out_file=out_file, + wait_time=wait_time, + backend=backend) + return img diff --git a/tutorials/qdtrack/tracker_reid_motion.py b/tutorials/qdtrack/tracker_reid_motion.py new file mode 100644 index 0000000000000000000000000000000000000000..406a0a413fe5d5682497ea2bef6a1148a8650cb6 --- /dev/null +++ b/tutorials/qdtrack/tracker_reid_motion.py @@ -0,0 +1,397 @@ +import numpy as np +from collections import deque +import os +import os.path as osp +import copy +import torch +import torch.nn.functional as F + +from mot_online.kalman_filter import KalmanFilter +from mot_online.basetrack import BaseTrack, TrackState +from mot_online import matching + + + +class STrack(BaseTrack): + shared_kalman = KalmanFilter() + def __init__(self, tlwh, score, temp_feat, buffer_size=30): + + # wait activate + self._tlwh = np.asarray(tlwh, dtype=np.float) + self.kalman_filter = None + self.mean, self.covariance = None, None + self.is_activated = False + + self.score = score + self.tracklet_len = 0 + + self.smooth_feat = None + self.update_features(temp_feat) + self.features = deque([], maxlen=buffer_size) + self.alpha = 0.9 + + def update_features(self, feat): + feat /= np.linalg.norm(feat) + self.curr_feat = feat + if self.smooth_feat is None: + self.smooth_feat = feat + else: + self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat + self.features.append(feat) + self.smooth_feat /= np.linalg.norm(self.smooth_feat) + + def predict(self): + mean_state = self.mean.copy() + if self.state != TrackState.Tracked: + mean_state[7] = 0 + self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) + + @staticmethod + def multi_predict(stracks): + if len(stracks) > 0: + multi_mean = np.asarray([st.mean.copy() for st in stracks]) + multi_covariance = np.asarray([st.covariance for st in stracks]) + for i, st in enumerate(stracks): + if st.state != TrackState.Tracked: + multi_mean[i][7] = 0 + multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance) + for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): + stracks[i].mean = mean + stracks[i].covariance = cov + + def activate(self, kalman_filter, frame_id): + """Start a new tracklet""" + self.kalman_filter = kalman_filter + self.track_id = self.next_id() + self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) + + self.tracklet_len = 0 + self.state = TrackState.Tracked + if frame_id == 1: + self.is_activated = True + # self.is_activated = True + self.frame_id = frame_id + self.start_frame = frame_id + + def re_activate(self, new_track, frame_id, new_id=False): + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) + ) + + self.update_features(new_track.curr_feat) + self.tracklet_len = 0 + self.state = TrackState.Tracked + self.is_activated = True + self.frame_id = frame_id + if new_id: + self.track_id = self.next_id() + + def update(self, new_track, frame_id, update_feature=True): + """ + Update a matched track + :type new_track: STrack + :type frame_id: int + :type update_feature: bool + :return: + """ + self.frame_id = frame_id + self.tracklet_len += 1 + + new_tlwh = new_track.tlwh + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) + self.state = TrackState.Tracked + self.is_activated = True + + self.score = new_track.score + if update_feature: + self.update_features(new_track.curr_feat) + + @property + # @jit(nopython=True) + def tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + """ + if self.mean is None: + return self._tlwh.copy() + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + @property + # @jit(nopython=True) + def tlbr(self): + """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_xyah(tlwh): + """Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = np.asarray(tlwh).copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + def to_xyah(self): + return self.tlwh_to_xyah(self.tlwh) + + @staticmethod + # @jit(nopython=True) + def tlbr_to_tlwh(tlbr): + ret = np.asarray(tlbr).copy() + ret[2:] -= ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_tlbr(tlwh): + ret = np.asarray(tlwh).copy() + ret[2:] += ret[:2] + return ret + + def __repr__(self): + return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) + + +class BYTETracker(object): + def __init__(self, frame_rate=30): + self.tracked_stracks = [] # type: list[STrack] + self.lost_stracks = [] # type: list[STrack] + self.removed_stracks = [] # type: list[STrack] + + self.frame_id = 0 + + self.low_thresh = 0.2 + self.track_thresh = 0.8 + self.det_thresh = self.track_thresh + 0.1 + + + self.buffer_size = int(frame_rate / 30.0 * 30) + self.max_time_lost = self.buffer_size + self.kalman_filter = KalmanFilter() + +# def update(self, output_results): + def update(self, det_bboxes, det_labels, frame_id, track_feats): + +# self.frame_id += 1 + self.frame_id = frame_id + 1 + activated_starcks = [] + refind_stracks = [] + lost_stracks = [] + removed_stracks = [] + +# scores = output_results[:, 4] +# bboxes = output_results[:, :4] # x1y1x2y2 + scores = det_bboxes[:, 4].cpu().numpy() + bboxes = det_bboxes[:, :4].cpu().numpy() + + track_feature = F.normalize(track_feats).cpu().numpy() + + remain_inds = scores > self.track_thresh + dets = bboxes[remain_inds] + scores_keep = scores[remain_inds] + id_feature = track_feature[remain_inds] + + + inds_low = scores > self.low_thresh + inds_high = scores < self.track_thresh + inds_second = np.logical_and(inds_low, inds_high) + dets_second = bboxes[inds_second] + scores_second = scores[inds_second] + id_feature_second = track_feature[inds_second] + + if len(dets) > 0: + '''Detections''' + detections = [STrack(STrack.tlbr_to_tlwh(tlbr), s, f) for + (tlbr, s, f) in zip(dets, scores_keep, id_feature)] + else: + detections = [] + + + ''' Add newly detected tracklets to tracked_stracks''' + unconfirmed = [] + tracked_stracks = [] # type: list[STrack] + for track in self.tracked_stracks: + if not track.is_activated: + unconfirmed.append(track) + else: + tracked_stracks.append(track) + + ''' Step 2: First association, with Kalman and IOU''' + strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) + # Predict the current location with KF + STrack.multi_predict(strack_pool) + + dists = matching.embedding_distance(strack_pool, detections) + dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.6) +# dists = matching.iou_distance(strack_pool, detections) +# matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.8) + + for itracked, idet in matches: + track = strack_pool[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + track.update(detections[idet], self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + ''' Step 3: Second association, with IOU''' + detections = [detections[i] for i in u_detection] + r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] + dists = matching.iou_distance(r_tracked_stracks, detections) + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) + + for itracked, idet in matches: + track = r_tracked_stracks[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + + ''' Step 3.5: Second association, with IOU''' + # association the untrack to the low score detections + if len(dets_second) > 0: + '''Detections''' + detections_second = [STrack(STrack.tlbr_to_tlwh(tlbr), s, f) for + (tlbr, s, f) in zip(dets_second, scores_second, id_feature_second)] + else: + detections_second = [] + + second_tracked_stracks = [r_tracked_stracks[i] for i in u_track if r_tracked_stracks[i].state == TrackState.Tracked] + dists = matching.iou_distance(second_tracked_stracks, detections_second) + matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5) + for itracked, idet in matches: + track = second_tracked_stracks[itracked] + det = detections_second[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + for it in u_track: + #track = r_tracked_stracks[it] + track = second_tracked_stracks[it] + if not track.state == TrackState.Lost: + track.mark_lost() + lost_stracks.append(track) + + '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' + detections = [detections[i] for i in u_detection] + dists = matching.iou_distance(unconfirmed, detections) + matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) + for itracked, idet in matches: + unconfirmed[itracked].update(detections[idet], self.frame_id) + activated_starcks.append(unconfirmed[itracked]) + for it in u_unconfirmed: + track = unconfirmed[it] + track.mark_removed() + removed_stracks.append(track) + + """ Step 4: Init new stracks""" + for inew in u_detection: + track = detections[inew] + if track.score < self.det_thresh: + continue + track.activate(self.kalman_filter, self.frame_id) + activated_starcks.append(track) + """ Step 5: Update state""" + for track in self.lost_stracks: + if self.frame_id - track.end_frame > self.max_time_lost: + track.mark_removed() + removed_stracks.append(track) + + # print('Ramained match {} s'.format(t4-t3)) + + self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] + self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) + self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) + self.lost_stracks.extend(lost_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) + self.removed_stracks.extend(removed_stracks) + self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) + # get scores of lost tracks + output_stracks = [track for track in self.tracked_stracks if track.is_activated] + +# return output_stracks + + bboxes = [] + labels = [] + ids = [] + for track in output_stracks: + if track.is_activated: + track_bbox = track.tlbr + bboxes.append([track_bbox[0], track_bbox[1], track_bbox[2], track_bbox[3], track.score]) + labels.append(0) + ids.append(track.track_id) + return torch.tensor(bboxes), torch.tensor(labels), torch.tensor(ids) + +def joint_stracks(tlista, tlistb): + exists = {} + res = [] + for t in tlista: + exists[t.track_id] = 1 + res.append(t) + for t in tlistb: + tid = t.track_id + if not exists.get(tid, 0): + exists[tid] = 1 + res.append(t) + return res + + +def sub_stracks(tlista, tlistb): + stracks = {} + for t in tlista: + stracks[t.track_id] = t + for t in tlistb: + tid = t.track_id + if stracks.get(tid, 0): + del stracks[tid] + return list(stracks.values()) + + +def remove_duplicate_stracks(stracksa, stracksb): + pdist = matching.iou_distance(stracksa, stracksb) + pairs = np.where(pdist < 0.15) + dupa, dupb = list(), list() + for p, q in zip(*pairs): + timep = stracksa[p].frame_id - stracksa[p].start_frame + timeq = stracksb[q].frame_id - stracksb[q].start_frame + if timep > timeq: + dupb.append(q) + else: + dupa.append(p) + resa = [t for i, t in enumerate(stracksa) if not i in dupa] + resb = [t for i, t in enumerate(stracksb) if not i in dupb] + return resa, resb + + +def remove_fp_stracks(stracksa, n_frame=10): + remain = [] + for t in stracksa: + score_5 = t.score_list[-n_frame:] + score_5 = np.array(score_5, dtype=np.float32) + index = score_5 < 0.45 + num = np.sum(index) + if num < n_frame: + remain.append(t) + return remain diff --git a/tutorials/trades/README.md b/tutorials/trades/README.md new file mode 100644 index 0000000000000000000000000000000000000000..95afad0195f6230b7ca593dfd088ea7953ff2ed6 --- /dev/null +++ b/tutorials/trades/README.md @@ -0,0 +1,41 @@ +# TraDeS + +Step1. git clone https://github.com/JialianW/TraDeS.git + + +Step2. + +replace https://github.com/JialianW/TraDeS/blob/master/src/lib/utils/tracker.py + +replace https://github.com/JialianW/TraDeS/blob/master/src/lib/opts.py + + +Step3. run +``` +python3 test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --inference --load_model ../models/mot_half.pth --gpus 0 --clip_len 3 --trades --track_thresh 0.4 --new_thresh 0.4 --out_thresh 0.2 --pre_thresh 0.5 +``` + + +# TraDeS_BYTE + +Step1. git clone https://github.com/JialianW/TraDeS.git + + +Step2. + +replace https://github.com/JialianW/TraDeS/blob/master/src/lib/utils/tracker.py by byte_tracker.py + +replace https://github.com/JialianW/TraDeS/blob/master/src/lib/opts.py + +add mot_online to https://github.com/JialianW/TraDeS/blob/master/src/lib/utils + +Step3. run +``` +python3 test.py tracking --exp_id mot17_half --dataset mot --dataset_version 17halfval --pre_hm --ltrb_amodal --inference --load_model ../models/mot_half.pth --gpus 0 --clip_len 3 --trades --track_thresh 0.4 --new_thresh 0.5 --out_thresh 0.1 --pre_thresh 0.5 +``` + + +## Notes +tracker.py: motion + reid + +byte_tracker.py: motion with kalman filter diff --git a/tutorials/trades/byte_tracker.py b/tutorials/trades/byte_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..d154045b6c86ceebdf941a5e735b47f4542d7908 --- /dev/null +++ b/tutorials/trades/byte_tracker.py @@ -0,0 +1,352 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +from sklearn.utils.linear_assignment_ import linear_assignment +# from numba import jit +import copy +from .mot_online.kalman_filter import KalmanFilter +from .mot_online.basetrack import BaseTrack, TrackState +from .mot_online import matching + + +class STrack(BaseTrack): + shared_kalman = KalmanFilter() + def __init__(self, tlwh, score): + + # wait activate + self._tlwh = np.asarray(tlwh, dtype=np.float) + self.kalman_filter = None + self.mean, self.covariance = None, None + self.is_activated = False + + self.score = score + self.tracklet_len = 0 + + def predict(self): + mean_state = self.mean.copy() + if self.state != TrackState.Tracked: + mean_state[7] = 0 + self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) + + @staticmethod + def multi_predict(stracks): + if len(stracks) > 0: + multi_mean = np.asarray([st.mean.copy() for st in stracks]) + multi_covariance = np.asarray([st.covariance for st in stracks]) + for i, st in enumerate(stracks): + if st.state != TrackState.Tracked: + multi_mean[i][7] = 0 + multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance) + for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): + stracks[i].mean = mean + stracks[i].covariance = cov + + def activate(self, kalman_filter, frame_id): + """Start a new tracklet""" + self.kalman_filter = kalman_filter + self.track_id = self.next_id() + self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) + + self.tracklet_len = 0 + self.state = TrackState.Tracked + if frame_id == 1: + self.is_activated = True + # self.is_activated = True + self.frame_id = frame_id + self.start_frame = frame_id + + def re_activate(self, new_track, frame_id, new_id=False): + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) + ) + self.tracklet_len = 0 + self.state = TrackState.Tracked + self.is_activated = True + self.frame_id = frame_id + if new_id: + self.track_id = self.next_id() + self.score = new_track.score + + def update(self, new_track, frame_id): + """ + Update a matched track + :type new_track: STrack + :type frame_id: int + :type update_feature: bool + :return: + """ + self.frame_id = frame_id + self.tracklet_len += 1 + + new_tlwh = new_track.tlwh + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) + self.state = TrackState.Tracked + self.is_activated = True + + self.score = new_track.score + + @property + # @jit(nopython=True) + def tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + """ + if self.mean is None: + return self._tlwh.copy() + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + @property + # @jit(nopython=True) + def tlbr(self): + """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_xyah(tlwh): + """Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = np.asarray(tlwh).copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + def to_xyah(self): + return self.tlwh_to_xyah(self.tlwh) + + @staticmethod + # @jit(nopython=True) + def tlbr_to_tlwh(tlbr): + ret = np.asarray(tlbr).copy() + ret[2:] -= ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_tlbr(tlwh): + ret = np.asarray(tlwh).copy() + ret[2:] += ret[:2] + return ret + + def __repr__(self): + return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) + + + +class BYTETracker(object): + def __init__(self, args, frame_rate=30): + self.args = args + self.det_thresh = args.new_thresh + self.buffer_size = int(frame_rate / 30.0 * args.track_buffer) + self.max_time_lost = self.buffer_size + self.reset() + + # below has no effect to final output, just to be compatible to codebase + def init_track(self, results): + for item in results: + if item['score'] > self.opt.new_thresh and item['class'] == 1: + self.id_count += 1 + item['active'] = 1 + item['age'] = 1 + item['tracking_id'] = self.id_count + if not ('ct' in item): + bbox = item['bbox'] + item['ct'] = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2] + self.tracks.append(item) + + def reset(self): + self.frame_id = 0 + self.kalman_filter = KalmanFilter() + self.tracked_stracks = [] # type: list[STrack] + self.lost_stracks = [] # type: list[STrack] + self.removed_stracks = [] # type: list[STrack] + self.tracks = [] + + # below has no effect to final output, just to be compatible to codebase + self.id_count = 0 + + def step(self, results, public_det=None): + self.frame_id += 1 + activated_starcks = [] + refind_stracks = [] + lost_stracks = [] + removed_stracks = [] + detections = [] + detections_second = [] + + scores = np.array([item['score'] for item in results if item['class'] == 1], np.float32) + bboxes = np.vstack([item['bbox'] for item in results if item['class'] == 1]) # N x 4, x1y1x2y2 + + remain_inds = scores >= self.args.track_thresh + dets = bboxes[remain_inds] + scores_keep = scores[remain_inds] + + + inds_low = scores > self.args.out_thresh + inds_high = scores < self.args.track_thresh + inds_second = np.logical_and(inds_low, inds_high) + dets_second = bboxes[inds_second] + scores_second = scores[inds_second] + + if len(dets) > 0: + '''Detections''' + detections = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for + (tlbr, s) in zip(dets, scores_keep)] + else: + detections = [] + + ''' Add newly detected tracklets to tracked_stracks''' + unconfirmed = [] + tracked_stracks = [] # type: list[STrack] + for track in self.tracked_stracks: + if not track.is_activated: + unconfirmed.append(track) + else: + tracked_stracks.append(track) + + ''' Step 2: First association, with Kalman and IOU''' + strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) + # Predict the current location with KF + STrack.multi_predict(strack_pool) + dists = matching.iou_distance(strack_pool, detections) + #dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.9) + + for itracked, idet in matches: + track = strack_pool[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + track.update(detections[idet], self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + ''' Step 3: Second association, association the untrack to the low score detections, with IOU''' + if len(dets_second) > 0: + '''Detections''' + detections_second = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for + (tlbr, s) in zip(dets_second, scores_second)] + else: + detections_second = [] + r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] + dists = matching.iou_distance(r_tracked_stracks, detections_second) + matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.4) + for itracked, idet in matches: + track = r_tracked_stracks[itracked] + det = detections_second[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + for it in u_track: + #track = r_tracked_stracks[it] + track = r_tracked_stracks[it] + if not track.state == TrackState.Lost: + track.mark_lost() + lost_stracks.append(track) + + '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' + detections = [detections[i] for i in u_detection] + dists = matching.iou_distance(unconfirmed, detections) + matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) + for itracked, idet in matches: + unconfirmed[itracked].update(detections[idet], self.frame_id) + activated_starcks.append(unconfirmed[itracked]) + for it in u_unconfirmed: + track = unconfirmed[it] + track.mark_removed() + removed_stracks.append(track) + + """ Step 4: Init new stracks""" + for inew in u_detection: + track = detections[inew] + if track.score < self.det_thresh: + continue + track.activate(self.kalman_filter, self.frame_id) + activated_starcks.append(track) + """ Step 5: Update state""" + for track in self.lost_stracks: + if self.frame_id - track.end_frame > self.max_time_lost: + track.mark_removed() + removed_stracks.append(track) + + self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] + self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) + self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) + self.lost_stracks.extend(lost_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) + self.removed_stracks.extend(removed_stracks) + self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) + output_stracks = [track for track in self.tracked_stracks if track.is_activated] + + ret = [] + for track in output_stracks: + track_dict = {} + track_dict['score'] = track.score + track_dict['bbox'] = track.tlbr + bbox = track_dict['bbox'] + track_dict['ct'] = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2] + track_dict['active'] = 1 if track.is_activated else 0 + track_dict['tracking_id'] = track.track_id + track_dict['class'] = 1 + ret.append(track_dict) + + self.tracks = ret + return ret + + +def joint_stracks(tlista, tlistb): + exists = {} + res = [] + for t in tlista: + exists[t.track_id] = 1 + res.append(t) + for t in tlistb: + tid = t.track_id + if not exists.get(tid, 0): + exists[tid] = 1 + res.append(t) + return res + + +def sub_stracks(tlista, tlistb): + stracks = {} + for t in tlista: + stracks[t.track_id] = t + for t in tlistb: + tid = t.track_id + if stracks.get(tid, 0): + del stracks[tid] + return list(stracks.values()) + + +def remove_duplicate_stracks(stracksa, stracksb): + pdist = matching.iou_distance(stracksa, stracksb) + pairs = np.where(pdist < 0.15) + dupa, dupb = list(), list() + for p, q in zip(*pairs): + timep = stracksa[p].frame_id - stracksa[p].start_frame + timeq = stracksb[q].frame_id - stracksb[q].start_frame + if timep > timeq: + dupb.append(q) + else: + dupa.append(p) + resa = [t for i, t in enumerate(stracksa) if not i in dupa] + resb = [t for i, t in enumerate(stracksb) if not i in dupb] + return resa, resb diff --git a/tutorials/trades/mot_online/basetrack.py b/tutorials/trades/mot_online/basetrack.py new file mode 100644 index 0000000000000000000000000000000000000000..4fe2233607f6d4ed28b11a0ae6c0303c8ca19098 --- /dev/null +++ b/tutorials/trades/mot_online/basetrack.py @@ -0,0 +1,52 @@ +import numpy as np +from collections import OrderedDict + + +class TrackState(object): + New = 0 + Tracked = 1 + Lost = 2 + Removed = 3 + + +class BaseTrack(object): + _count = 0 + + track_id = 0 + is_activated = False + state = TrackState.New + + history = OrderedDict() + features = [] + curr_feature = None + score = 0 + start_frame = 0 + frame_id = 0 + time_since_update = 0 + + # multi-camera + location = (np.inf, np.inf) + + @property + def end_frame(self): + return self.frame_id + + @staticmethod + def next_id(): + BaseTrack._count += 1 + return BaseTrack._count + + def activate(self, *args): + raise NotImplementedError + + def predict(self): + raise NotImplementedError + + def update(self, *args, **kwargs): + raise NotImplementedError + + def mark_lost(self): + self.state = TrackState.Lost + + def mark_removed(self): + self.state = TrackState.Removed diff --git a/tutorials/trades/mot_online/kalman_filter.py b/tutorials/trades/mot_online/kalman_filter.py new file mode 100644 index 0000000000000000000000000000000000000000..82111a336d4d94bece171f2f95d9147bb7456285 --- /dev/null +++ b/tutorials/trades/mot_online/kalman_filter.py @@ -0,0 +1,252 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np +import scipy.linalg + +""" +Table for the 0.95 quantile of the chi-square distribution with N degrees of +freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv +function and used as Mahalanobis gating threshold. +""" +chi2inv95 = { + 1: 3.8415, + 2: 5.9915, + 3: 7.8147, + 4: 9.4877, + 5: 11.070, + 6: 12.592, + 7: 14.067, + 8: 15.507, + 9: 16.919} + + +class KalmanFilter(object): + """ + A simple Kalman filter for tracking bounding boxes in image space. + The 8-dimensional state space + x, y, a, h, vx, vy, va, vh + contains the bounding box center position (x, y), aspect ratio a, height h, + and their respective velocities. + Object motion follows a constant velocity model. The bounding box location + (x, y, a, h) is taken as direct observation of the state space (linear + observation model). + """ + + def __init__(self): + ndim, dt = 4, 1. + + # Create Kalman filter model matrices. + self._motion_mat = np.eye(2 * ndim, 2 * ndim) + for i in range(ndim): + self._motion_mat[i, ndim + i] = dt + self._update_mat = np.eye(ndim, 2 * ndim) + + # Motion and observation uncertainty are chosen relative to the current + # state estimate. These weights control the amount of uncertainty in + # the model. This is a bit hacky. + self._std_weight_position = 1. / 20 + self._std_weight_velocity = 1. / 160 + + def initiate(self, measurement): + """Create track from unassociated measurement. + Parameters + ---------- + measurement : ndarray + Bounding box coordinates (x, y, a, h) with center position (x, y), + aspect ratio a, and height h. + Returns + ------- + (ndarray, ndarray) + Returns the mean vector (8 dimensional) and covariance matrix (8x8 + dimensional) of the new track. Unobserved velocities are initialized + to 0 mean. + """ + mean_pos = measurement + mean_vel = np.zeros_like(mean_pos) + mean = np.r_[mean_pos, mean_vel] + + std = [ + 2 * self._std_weight_position * measurement[3], + 2 * self._std_weight_position * measurement[3], + 1e-2, + 2 * self._std_weight_position * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 1e-5, + 10 * self._std_weight_velocity * measurement[3]] + covariance = np.diag(np.square(std)) + return mean, covariance + + def predict(self, mean, covariance): + """Run Kalman filter prediction step. + Parameters + ---------- + mean : ndarray + The 8 dimensional mean vector of the object state at the previous + time step. + covariance : ndarray + The 8x8 dimensional covariance matrix of the object state at the + previous time step. + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-2, + self._std_weight_position * mean[3]] + std_vel = [ + self._std_weight_velocity * mean[3], + self._std_weight_velocity * mean[3], + 1e-5, + self._std_weight_velocity * mean[3]] + motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) + + #mean = np.dot(self._motion_mat, mean) + mean = np.dot(mean, self._motion_mat.T) + covariance = np.linalg.multi_dot(( + self._motion_mat, covariance, self._motion_mat.T)) + motion_cov + + return mean, covariance + + def project(self, mean, covariance): + """Project state distribution to measurement space. + Parameters + ---------- + mean : ndarray + The state's mean vector (8 dimensional array). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + Returns + ------- + (ndarray, ndarray) + Returns the projected mean and covariance matrix of the given state + estimate. + """ + std = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-1, + self._std_weight_position * mean[3]] + innovation_cov = np.diag(np.square(std)) + + mean = np.dot(self._update_mat, mean) + covariance = np.linalg.multi_dot(( + self._update_mat, covariance, self._update_mat.T)) + return mean, covariance + innovation_cov + + def multi_predict(self, mean, covariance): + """Run Kalman filter prediction step (Vectorized version). + Parameters + ---------- + mean : ndarray + The Nx8 dimensional mean matrix of the object states at the previous + time step. + covariance : ndarray + The Nx8x8 dimensional covariance matrics of the object states at the + previous time step. + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[:, 3], + self._std_weight_position * mean[:, 3], + 1e-2 * np.ones_like(mean[:, 3]), + self._std_weight_position * mean[:, 3]] + std_vel = [ + self._std_weight_velocity * mean[:, 3], + self._std_weight_velocity * mean[:, 3], + 1e-5 * np.ones_like(mean[:, 3]), + self._std_weight_velocity * mean[:, 3]] + sqr = np.square(np.r_[std_pos, std_vel]).T + + motion_cov = [] + for i in range(len(mean)): + motion_cov.append(np.diag(sqr[i])) + motion_cov = np.asarray(motion_cov) + + mean = np.dot(mean, self._motion_mat.T) + left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2)) + covariance = np.dot(left, self._motion_mat.T) + motion_cov + + return mean, covariance + + def update(self, mean, covariance, measurement): + """Run Kalman filter correction step. + Parameters + ---------- + mean : ndarray + The predicted state's mean vector (8 dimensional). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + measurement : ndarray + The 4 dimensional measurement vector (x, y, a, h), where (x, y) + is the center position, a the aspect ratio, and h the height of the + bounding box. + Returns + ------- + (ndarray, ndarray) + Returns the measurement-corrected state distribution. + """ + projected_mean, projected_cov = self.project(mean, covariance) + + chol_factor, lower = scipy.linalg.cho_factor( + projected_cov, lower=True, check_finite=False) + kalman_gain = scipy.linalg.cho_solve( + (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, + check_finite=False).T + innovation = measurement - projected_mean + + new_mean = mean + np.dot(innovation, kalman_gain.T) + new_covariance = covariance - np.linalg.multi_dot(( + kalman_gain, projected_cov, kalman_gain.T)) + return new_mean, new_covariance + + def gating_distance(self, mean, covariance, measurements, + only_position=False, metric='maha'): + """Compute gating distance between state distribution and measurements. + A suitable distance threshold can be obtained from `chi2inv95`. If + `only_position` is False, the chi-square distribution has 4 degrees of + freedom, otherwise 2. + Parameters + ---------- + mean : ndarray + Mean vector over the state distribution (8 dimensional). + covariance : ndarray + Covariance of the state distribution (8x8 dimensional). + measurements : ndarray + An Nx4 dimensional matrix of N measurements, each in + format (x, y, a, h) where (x, y) is the bounding box center + position, a the aspect ratio, and h the height. + only_position : Optional[bool] + If True, distance computation is done with respect to the bounding + box center position only. + Returns + ------- + ndarray + Returns an array of length N, where the i-th element contains the + squared Mahalanobis distance between (mean, covariance) and + `measurements[i]`. + """ + mean, covariance = self.project(mean, covariance) + if only_position: + mean, covariance = mean[:2], covariance[:2, :2] + measurements = measurements[:, :2] + + d = measurements - mean + if metric == 'gaussian': + return np.sum(d * d, axis=1) + elif metric == 'maha': + cholesky_factor = np.linalg.cholesky(covariance) + z = scipy.linalg.solve_triangular( + cholesky_factor, d.T, lower=True, check_finite=False, + overwrite_b=True) + squared_maha = np.sum(z * z, axis=0) + return squared_maha + else: + raise ValueError('invalid distance metric') diff --git a/tutorials/trades/mot_online/matching.py b/tutorials/trades/mot_online/matching.py new file mode 100644 index 0000000000000000000000000000000000000000..cc7abab60f86e5e84994071fc0ec0dd2f89c0377 --- /dev/null +++ b/tutorials/trades/mot_online/matching.py @@ -0,0 +1,196 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import lap +import numpy as np +import scipy +from cython_bbox import bbox_overlaps as bbox_ious +from scipy.spatial.distance import cdist + +chi2inv95 = { + 1: 3.8415, + 2: 5.9915, + 3: 7.8147, + 4: 9.4877, + 5: 11.070, + 6: 12.592, + 7: 14.067, + 8: 15.507, + 9: 16.919} + +def merge_matches(m1, m2, shape): + O,P,Q = shape + m1 = np.asarray(m1) + m2 = np.asarray(m2) + + M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P)) + M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q)) + + mask = M1*M2 + match = mask.nonzero() + match = list(zip(match[0], match[1])) + unmatched_O = tuple(set(range(O)) - set([i for i, j in match])) + unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match])) + + return match, unmatched_O, unmatched_Q + + +def _indices_to_matches(cost_matrix, indices, thresh): + matched_cost = cost_matrix[tuple(zip(*indices))] + matched_mask = (matched_cost <= thresh) + + matches = indices[matched_mask] + unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0])) + unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1])) + + return matches, unmatched_a, unmatched_b + + +def linear_assignment(cost_matrix, thresh): + if cost_matrix.size == 0: + return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) + matches, unmatched_a, unmatched_b = [], [], [] + cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) + for ix, mx in enumerate(x): + if mx >= 0: + matches.append([ix, mx]) + unmatched_a = np.where(x < 0)[0] + unmatched_b = np.where(y < 0)[0] + matches = np.asarray(matches) + return matches, unmatched_a, unmatched_b + + +def ious(atlbrs, btlbrs): + """ + Compute cost based on IoU + :type atlbrs: list[tlbr] | np.ndarray + :type atlbrs: list[tlbr] | np.ndarray + :rtype ious np.ndarray + """ + ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) + if ious.size == 0: + return ious + + ious = bbox_ious( + np.ascontiguousarray(atlbrs, dtype=np.float), + np.ascontiguousarray(btlbrs, dtype=np.float) + ) + + return ious + + +def iou_distance(atracks, btracks): + """ + Compute cost based on IoU + :type atracks: list[STrack] + :type btracks: list[STrack] + :rtype cost_matrix np.ndarray + """ + + if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): + atlbrs = atracks + btlbrs = btracks + else: + atlbrs = [track.tlbr for track in atracks] + btlbrs = [track.tlbr for track in btracks] + _ious = ious(atlbrs, btlbrs) + cost_matrix = 1 - _ious + + return cost_matrix + +def embedding_distance(tracks, detections, metric='cosine'): + """ + :param tracks: list[STrack] + :param detections: list[BaseTrack] + :param metric: + :return: cost_matrix np.ndarray + """ + + cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) + if cost_matrix.size == 0: + return cost_matrix + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + #for i, track in enumerate(tracks): + #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + return cost_matrix + +def embedding_distance2(tracks, detections, metric='cosine'): + """ + :param tracks: list[STrack] + :param detections: list[BaseTrack] + :param metric: + :return: cost_matrix np.ndarray + """ + + cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) + if cost_matrix.size == 0: + return cost_matrix + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + #for i, track in enumerate(tracks): + #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + track_features = np.asarray([track.features[0] for track in tracks], dtype=np.float) + cost_matrix2 = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + track_features = np.asarray([track.features[len(track.features)-1] for track in tracks], dtype=np.float) + cost_matrix3 = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + for row in range(len(cost_matrix)): + cost_matrix[row] = (cost_matrix[row]+cost_matrix2[row]+cost_matrix3[row])/3 + return cost_matrix + + +def vis_id_feature_A_distance(tracks, detections, metric='cosine'): + track_features = [] + det_features = [] + leg1 = len(tracks) + leg2 = len(detections) + cost_matrix = np.zeros((leg1, leg2), dtype=np.float) + cost_matrix_det = np.zeros((leg1, leg2), dtype=np.float) + cost_matrix_track = np.zeros((leg1, leg2), dtype=np.float) + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + if leg2 != 0: + cost_matrix_det = np.maximum(0.0, cdist(det_features, det_features, metric)) + if leg1 != 0: + cost_matrix_track = np.maximum(0.0, cdist(track_features, track_features, metric)) + if cost_matrix.size == 0: + return track_features, det_features, cost_matrix, cost_matrix_det, cost_matrix_track + cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) + if leg1 > 10: + leg1 = 10 + tracks = tracks[:10] + if leg2 > 10: + leg2 = 10 + detections = detections[:10] + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + return track_features, det_features, cost_matrix, cost_matrix_det, cost_matrix_track + +def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False): + if cost_matrix.size == 0: + return cost_matrix + gating_dim = 2 if only_position else 4 + gating_threshold = chi2inv95[gating_dim] + measurements = np.asarray([det.to_xyah() for det in detections]) + for row, track in enumerate(tracks): + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position) + cost_matrix[row, gating_distance > gating_threshold] = np.inf + return cost_matrix + + +def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98): + if cost_matrix.size == 0: + return cost_matrix + gating_dim = 2 if only_position else 4 + gating_threshold = chi2inv95[gating_dim] + measurements = np.asarray([det.to_xyah() for det in detections]) + for row, track in enumerate(tracks): + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position, metric='maha') + cost_matrix[row, gating_distance > gating_threshold] = np.inf + cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance + return cost_matrix diff --git a/tutorials/trades/opts.py b/tutorials/trades/opts.py new file mode 100644 index 0000000000000000000000000000000000000000..3993c793ac9afd6b666563328cb7be8fdcc871b1 --- /dev/null +++ b/tutorials/trades/opts.py @@ -0,0 +1,439 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import os +import sys +import json + +class opts(object): + def __init__(self): + self.parser = argparse.ArgumentParser() + # basic experiment setting + self.parser.add_argument('task', default='', + help='ctdet | ddd | multi_pose ' + '| tracking or combined with ,') + self.parser.add_argument('--dataset', default='coco', + help='see lib/dataset/dataset_facotry for ' + + 'available datasets') + self.parser.add_argument('--test_dataset', default='', + help='coco | kitti | coco_hp | pascal') + self.parser.add_argument('--exp_id', default='default') + self.parser.add_argument('--test', action='store_true') + self.parser.add_argument('--debug', type=int, default=0, + help='level of visualization.' + '1: only show the final detection results' + '2: show the network output features' + '3: use matplot to display' # useful when lunching training with ipython notebook + '4: save all visualizations to disk') + self.parser.add_argument('--no_pause', action='store_true') + self.parser.add_argument('--demo', default='', + help='path to image/ image folders/ video. ' + 'or "webcam"') + self.parser.add_argument('--load_model', default='', + help='path to pretrained model') + self.parser.add_argument('--resume', action='store_true', + help='resume an experiment. ' + 'Reloaded the optimizer parameter and ' + 'set load_model to model_last.pth ' + 'in the exp dir if load_model is empty.') + + # system + self.parser.add_argument('--gpus', default='0', + help='-1 for CPU, use comma for multiple gpus') + self.parser.add_argument('--num_workers', type=int, default=4, + help='dataloader threads. 0 for single-thread.') + self.parser.add_argument('--not_cuda_benchmark', action='store_true', + help='disable when the input size is not fixed.') + self.parser.add_argument('--seed', type=int, default=317, + help='random seed') # from CornerNet + self.parser.add_argument('--not_set_cuda_env', action='store_true', + help='used when training in slurm clusters.') + + # log + self.parser.add_argument('--print_iter', type=int, default=0, + help='disable progress bar and print to screen.') + self.parser.add_argument('--save_all', action='store_true', + help='save model to disk every 5 epochs.') + self.parser.add_argument('--vis_thresh', type=float, default=0.3, + help='visualization threshold.') + self.parser.add_argument('--debugger_theme', default='white', + choices=['white', 'black']) + self.parser.add_argument('--eval_val', action='store_true') + self.parser.add_argument('--save_imgs', default='', help='') + self.parser.add_argument('--save_img_suffix', default='', help='') + self.parser.add_argument('--skip_first', type=int, default=-1, help='') + self.parser.add_argument('--save_video', action='store_true') + self.parser.add_argument('--save_framerate', type=int, default=30) + self.parser.add_argument('--resize_video', action='store_true') + self.parser.add_argument('--video_h', type=int, default=512, help='') + self.parser.add_argument('--video_w', type=int, default=512, help='') + self.parser.add_argument('--transpose_video', action='store_true') + self.parser.add_argument('--show_track_color', action='store_true') + self.parser.add_argument('--not_show_bbox', action='store_true') + self.parser.add_argument('--not_show_number', action='store_true') + self.parser.add_argument('--qualitative', action='store_true') + self.parser.add_argument('--tango_color', action='store_true') + + # model + self.parser.add_argument('--arch', default='dla_34', + help='model architecture. Currently tested' + 'res_18 | res_101 | resdcn_18 | resdcn_101 |' + 'dlav0_34 | dla_34 | hourglass') + self.parser.add_argument('--dla_node', default='dcn') + self.parser.add_argument('--head_conv', type=int, default=-1, + help='conv layer channels for output head' + '0 for no conv layer' + '-1 for default setting: ' + '64 for resnets and 256 for dla.') + self.parser.add_argument('--num_head_conv', type=int, default=1) + self.parser.add_argument('--head_kernel', type=int, default=3, help='') + self.parser.add_argument('--down_ratio', type=int, default=4, + help='output stride. Currently only supports 4.') + self.parser.add_argument('--not_idaup', action='store_true') + self.parser.add_argument('--num_classes', type=int, default=-1) + self.parser.add_argument('--num_layers', type=int, default=101) + self.parser.add_argument('--backbone', default='dla34') + self.parser.add_argument('--neck', default='dlaup') + self.parser.add_argument('--msra_outchannel', type=int, default=256) + self.parser.add_argument('--efficient_level', type=int, default=0) + self.parser.add_argument('--prior_bias', type=float, default=-4.6) # -2.19 + self.parser.add_argument('--embedding', action='store_true') + self.parser.add_argument('--box_nms', type=float, default=-1) + self.parser.add_argument('--inference', action='store_true') + self.parser.add_argument('--clip_len', type=int, default=1, help='number of images used in trades' + 'including the current image') + self.parser.add_argument('--no_repeat', action='store_true', default=True) + self.parser.add_argument('--seg', action='store_true', default=False) + self.parser.add_argument('--seg_feat_channel', default=8, type=int, help='.') + self.parser.add_argument('--deform_kernel_size', type=int, default=3) + self.parser.add_argument('--trades', action='store_true', help='Track to Detect and Segment:' + 'An Online Multi Object Tracker') + + # input + self.parser.add_argument('--input_res', type=int, default=-1, + help='input height and width. -1 for default from ' + 'dataset. Will be overriden by input_h | input_w') + self.parser.add_argument('--input_h', type=int, default=-1, + help='input height. -1 for default from dataset.') + self.parser.add_argument('--input_w', type=int, default=-1, + help='input width. -1 for default from dataset.') + self.parser.add_argument('--dataset_version', default='') + + # train + self.parser.add_argument('--optim', default='adam') + self.parser.add_argument('--lr', type=float, default=1.25e-4, + help='learning rate for batch size 32.') + self.parser.add_argument('--lr_step', type=str, default='60', + help='drop learning rate by 10.') + self.parser.add_argument('--save_point', type=str, default='90', + help='when to save the model to disk.') + self.parser.add_argument('--num_epochs', type=int, default=70, + help='total training epochs.') + self.parser.add_argument('--batch_size', type=int, default=32, + help='batch size') + self.parser.add_argument('--master_batch_size', type=int, default=-1, + help='batch size on the master gpu.') + self.parser.add_argument('--num_iters', type=int, default=-1, + help='default: #samples / batch_size.') + self.parser.add_argument('--val_intervals', type=int, default=10000, + help='number of epochs to run validation.') + self.parser.add_argument('--trainval', action='store_true', + help='include validation in training and ' + 'test on test set') + self.parser.add_argument('--ltrb', action='store_true', + help='') + self.parser.add_argument('--ltrb_weight', type=float, default=0.1, + help='') + self.parser.add_argument('--reset_hm', action='store_true') + self.parser.add_argument('--reuse_hm', action='store_true') + self.parser.add_argument('--use_kpt_center', action='store_true') + self.parser.add_argument('--add_05', action='store_true') + self.parser.add_argument('--dense_reg', type=int, default=1, help='') + + # test + self.parser.add_argument('--flip_test', action='store_true', + help='flip data augmentation.') + self.parser.add_argument('--test_scales', type=str, default='1', + help='multi scale test augmentation.') + self.parser.add_argument('--nms', action='store_true', + help='run nms in testing.') + self.parser.add_argument('--K', type=int, default=100, + help='max number of output objects.') + self.parser.add_argument('--not_prefetch_test', action='store_true', + help='not use parallal data pre-processing.') + self.parser.add_argument('--fix_short', type=int, default=-1) + self.parser.add_argument('--keep_res', action='store_true', + help='keep the original resolution' + ' during validation.') + self.parser.add_argument('--map_argoverse_id', action='store_true', + help='if trained on nuscenes and eval on kitti') + self.parser.add_argument('--out_thresh', type=float, default=-1, + help='') + self.parser.add_argument('--depth_scale', type=float, default=1, + help='') + self.parser.add_argument('--save_results', action='store_true') + self.parser.add_argument('--load_results', default='') + self.parser.add_argument('--use_loaded_results', action='store_true') + self.parser.add_argument('--ignore_loaded_cats', default='') + self.parser.add_argument('--model_output_list', action='store_true', + help='Used when convert to onnx') + self.parser.add_argument('--non_block_test', action='store_true') + self.parser.add_argument('--vis_gt_bev', default='', help='') + self.parser.add_argument('--kitti_split', default='3dop', + help='different validation split for kitti: ' + '3dop | subcnn') + self.parser.add_argument('--test_focal_length', type=int, default=-1) + + # dataset + self.parser.add_argument('--not_rand_crop', action='store_true', + help='not use the random crop data augmentation' + 'from CornerNet.') + self.parser.add_argument('--not_max_crop', action='store_true', + help='used when the training dataset has' + 'inbalanced aspect ratios.') + self.parser.add_argument('--shift', type=float, default=0, + help='when not using random crop, 0.1' + 'apply shift augmentation.') + self.parser.add_argument('--scale', type=float, default=0, + help='when not using random crop, 0.4' + 'apply scale augmentation.') + self.parser.add_argument('--aug_rot', type=float, default=0, + help='probability of applying ' + 'rotation augmentation.') + self.parser.add_argument('--rotate', type=float, default=0, + help='when not using random crop' + 'apply rotation augmentation.') + self.parser.add_argument('--flip', type=float, default=0.5, + help='probability of applying flip augmentation.') + self.parser.add_argument('--no_color_aug', action='store_true', + help='not use the color augmenation ' + 'from CornerNet') + + # Tracking + self.parser.add_argument('--tracking', action='store_true') + self.parser.add_argument('--pre_hm', action='store_true') + self.parser.add_argument('--same_aug_pre', action='store_true') + self.parser.add_argument('--zero_pre_hm', action='store_true') + self.parser.add_argument('--hm_disturb', type=float, default=0) + self.parser.add_argument('--lost_disturb', type=float, default=0) + self.parser.add_argument('--fp_disturb', type=float, default=0) + self.parser.add_argument('--pre_thresh', type=float, default=-1) + self.parser.add_argument('--track_thresh', type=float, default=0.3) + self.parser.add_argument('--match_thresh', type=float, default=0.8) + self.parser.add_argument('--track_buffer', type=int, default=30) + self.parser.add_argument('--new_thresh', type=float, default=0.0) + self.parser.add_argument('--max_frame_dist', type=int, default=3) + self.parser.add_argument('--ltrb_amodal', action='store_true') + self.parser.add_argument('--ltrb_amodal_weight', type=float, default=0.1) + self.parser.add_argument('--window_size', type=int, default=20) + self.parser.add_argument('--public_det', action='store_true') + self.parser.add_argument('--no_pre_img', action='store_true') + self.parser.add_argument('--zero_tracking', action='store_true') + self.parser.add_argument('--hungarian', action='store_true') + self.parser.add_argument('--max_age', type=int, default=-1) + + + # loss + self.parser.add_argument('--tracking_weight', type=float, default=1) + self.parser.add_argument('--reg_loss', default='l1', + help='regression loss: sl1 | l1 | l2') + self.parser.add_argument('--hm_weight', type=float, default=1, + help='loss weight for keypoint heatmaps.') + self.parser.add_argument('--off_weight', type=float, default=1, + help='loss weight for keypoint local offsets.') + self.parser.add_argument('--wh_weight', type=float, default=0.1, + help='loss weight for bounding box size.') + self.parser.add_argument('--hp_weight', type=float, default=1, + help='loss weight for human pose offset.') + self.parser.add_argument('--hm_hp_weight', type=float, default=1, + help='loss weight for human keypoint heatmap.') + self.parser.add_argument('--amodel_offset_weight', type=float, default=1, + help='Please forgive the typo.') + self.parser.add_argument('--dep_weight', type=float, default=1, + help='loss weight for depth.') + self.parser.add_argument('--dim_weight', type=float, default=1, + help='loss weight for 3d bounding box size.') + self.parser.add_argument('--rot_weight', type=float, default=1, + help='loss weight for orientation.') + self.parser.add_argument('--nuscenes_att', action='store_true') + self.parser.add_argument('--nuscenes_att_weight', type=float, default=1) + self.parser.add_argument('--velocity', action='store_true') + self.parser.add_argument('--velocity_weight', type=float, default=1) + self.parser.add_argument('--nID', type=int, default=-1) + + # custom dataset + self.parser.add_argument('--custom_dataset_img_path', default='') + self.parser.add_argument('--custom_dataset_ann_path', default='') + + def parse(self, args=''): + if args == '': + opt = self.parser.parse_args() + else: + opt = self.parser.parse_args(args) + + if opt.test_dataset == '': + opt.test_dataset = opt.dataset + + opt.gpus_str = opt.gpus + opt.gpus = [int(gpu) for gpu in opt.gpus.split(',')] + opt.gpus = [i for i in range(len(opt.gpus))] if opt.gpus[0] >=0 else [-1] + opt.lr_step = [int(i) for i in opt.lr_step.split(',')] + opt.save_point = [int(i) for i in opt.save_point.split(',')] + opt.test_scales = [float(i) for i in opt.test_scales.split(',')] + opt.save_imgs = [i for i in opt.save_imgs.split(',')] \ + if opt.save_imgs != '' else [] + opt.ignore_loaded_cats = \ + [int(i) for i in opt.ignore_loaded_cats.split(',')] \ + if opt.ignore_loaded_cats != '' else [] + + opt.num_workers = max(opt.num_workers, 2 * len(opt.gpus)) + opt.pre_img = False + if 'tracking' in opt.task: + print('Running tracking') + opt.tracking = True +# opt.out_thresh = max(opt.track_thresh, opt.out_thresh) +# opt.pre_thresh = max(opt.track_thresh, opt.pre_thresh) +# opt.new_thresh = max(opt.track_thresh, opt.new_thresh) + opt.pre_img = not opt.no_pre_img + print('Using tracking threshold for out threshold!', opt.track_thresh) + # if 'ddd' in opt.task: + opt.show_track_color = True + if opt.dataset in ['mot', 'mots', 'youtube_vis']: + opt.overlap_thresh = 0.05 + elif opt.dataset == 'nuscenes': + opt.window_size = 7 + opt.overlap_thresh = -1 + else: + opt.overlap_thresh = 0.05 + + opt.fix_res = not opt.keep_res + print('Fix size testing.' if opt.fix_res else 'Keep resolution testing.') + + if opt.head_conv == -1: # init default head_conv + opt.head_conv = 256 if 'dla' in opt.arch else 64 + + opt.pad = 127 if 'hourglass' in opt.arch else 31 + opt.num_stacks = 2 if opt.arch == 'hourglass' else 1 + + if opt.master_batch_size == -1: + opt.master_batch_size = opt.batch_size // len(opt.gpus) + rest_batch_size = (opt.batch_size - opt.master_batch_size) + opt.chunk_sizes = [opt.master_batch_size] + for i in range(len(opt.gpus) - 1): + slave_chunk_size = rest_batch_size // (len(opt.gpus) - 1) + if i < rest_batch_size % (len(opt.gpus) - 1): + slave_chunk_size += 1 + opt.chunk_sizes.append(slave_chunk_size) + print('training chunk_sizes:', opt.chunk_sizes) + + if opt.debug > 0: + opt.num_workers = 0 + opt.batch_size = 1 + opt.gpus = [opt.gpus[0]] + opt.master_batch_size = -1 + + # log dirs + opt.root_dir = os.path.join(os.path.dirname(__file__), '..', '..') + opt.data_dir = os.path.join(opt.root_dir, 'data') + opt.exp_dir = os.path.join(opt.root_dir, 'exp', opt.task) + opt.save_dir = os.path.join(opt.exp_dir, opt.exp_id) + opt.debug_dir = os.path.join(opt.save_dir, 'debug') + + if opt.resume and opt.load_model == '': + opt.load_model = os.path.join(opt.save_dir, 'model_last.pth') + return opt + + + def update_dataset_info_and_set_heads(self, opt, dataset): + opt.num_classes = dataset.num_categories \ + if opt.num_classes < 0 else opt.num_classes + # input_h(w): opt.input_h overrides opt.input_res overrides dataset default + input_h, input_w = dataset.default_resolution + input_h = opt.input_res if opt.input_res > 0 else input_h + input_w = opt.input_res if opt.input_res > 0 else input_w + opt.input_h = opt.input_h if opt.input_h > 0 else input_h + opt.input_w = opt.input_w if opt.input_w > 0 else input_w + opt.output_h = opt.input_h // opt.down_ratio + opt.output_w = opt.input_w // opt.down_ratio + opt.input_res = max(opt.input_h, opt.input_w) + opt.output_res = max(opt.output_h, opt.output_w) + + opt.heads = {'hm': opt.num_classes, 'reg': 2, 'wh': 2} + + if not opt.trades: + if 'tracking' in opt.task: + opt.heads.update({'tracking': 2}) + + if 'ddd' in opt.task: + opt.heads.update({'dep': 1, 'rot': 8, 'dim': 3, 'amodel_offset': 2}) + + if 'multi_pose' in opt.task: + opt.heads.update({ + 'hps': dataset.num_joints * 2, 'hm_hp': dataset.num_joints, + 'hp_offset': 2}) + + if opt.ltrb: + opt.heads.update({'ltrb': 4}) + if opt.ltrb_amodal: + opt.heads.update({'ltrb_amodal': 4}) + if opt.nuscenes_att: + opt.heads.update({'nuscenes_att': 8}) + if opt.velocity: + opt.heads.update({'velocity': 3}) + + if opt.embedding: + opt.heads.update({'embedding': 128}) + if opt.seg: + opt.heads.update({'conv_weight': 2*opt.seg_feat_channel**2 + 5*opt.seg_feat_channel + 1}) + opt.heads.update({'seg_feat': opt.seg_feat_channel}) + weight_dict = {'hm': opt.hm_weight, 'wh': opt.wh_weight, + 'reg': opt.off_weight, 'hps': opt.hp_weight, + 'hm_hp': opt.hm_hp_weight, 'hp_offset': opt.off_weight, + 'dep': opt.dep_weight, 'rot': opt.rot_weight, + 'dim': opt.dim_weight, + 'amodel_offset': opt.amodel_offset_weight, + 'ltrb': opt.ltrb_weight, + 'tracking': opt.tracking_weight, + 'ltrb_amodal': opt.ltrb_amodal_weight, + 'nuscenes_att': opt.nuscenes_att_weight, + 'velocity': opt.velocity_weight, + 'embedding': 1.0, + 'conv_weight': 1.0, + 'seg_feat':1.0} + opt.weights = {head: weight_dict[head] for head in opt.heads} + if opt.trades: + opt.weights['cost_volume'] = 1.0 + if opt.seg: + opt.weights['mask_loss'] = 1.0 + for head in opt.weights: + if opt.weights[head] == 0: + del opt.heads[head] + opt.head_conv = {head: [opt.head_conv \ + for i in range(opt.num_head_conv if head != 'reg' else 1)] for head in opt.heads} + + print('input h w:', opt.input_h, opt.input_w) + print('heads', opt.heads) + print('weights', opt.weights) + print('head conv', opt.head_conv) + + return opt + + def init(self, args=''): + # only used in demo + default_dataset_info = { + 'ctdet': 'coco', 'multi_pose': 'coco_hp', 'ddd': 'nuscenes', + 'tracking,ctdet': 'coco', 'tracking,multi_pose': 'coco_hp', + 'tracking,ddd': 'nuscenes' + } + opt = self.parse() + from dataset.dataset_factory import dataset_factory + train_dataset = default_dataset_info[opt.task] \ + if opt.task in default_dataset_info else 'coco' + if opt.dataset != 'coco': + dataset = dataset_factory[opt.dataset] + else: + dataset = dataset_factory[train_dataset] + opt = self.update_dataset_info_and_set_heads(opt, dataset) + return opt diff --git a/tutorials/trades/tracker.py b/tutorials/trades/tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..a607935cc335d48e784448f74a040175890a23f4 --- /dev/null +++ b/tutorials/trades/tracker.py @@ -0,0 +1,299 @@ +import numpy as np +from sklearn.utils.linear_assignment_ import linear_assignment +import copy +from sklearn.metrics.pairwise import cosine_similarity as cosine + + +class Tracker(object): + def __init__(self, opt): + self.opt = opt + self.reset() + self.nID = 10000 + self.alpha = 0.1 + + def init_track(self, results): + for item in results: + if item['score'] > self.opt.new_thresh: + self.id_count += 1 + # active and age are never used in the paper + item['active'] = 1 + item['age'] = 1 + item['tracking_id'] = self.id_count + if not ('ct' in item): + bbox = item['bbox'] + item['ct'] = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2] + self.tracks.append(item) + self.nID = 10000 + self.embedding_bank = np.zeros((self.nID, 128)) + self.cat_bank = np.zeros((self.nID), dtype=np.int) + + def reset(self): + self.id_count = 0 + self.nID = 10000 + self.tracks = [] + self.embedding_bank = np.zeros((self.nID, 128)) + self.cat_bank = np.zeros((self.nID), dtype=np.int) + self.tracklet_ages = np.zeros((self.nID), dtype=np.int) + self.alive = [] + + def step(self, results_with_low, public_det=None): + results = [item for item in results_with_low if item['score'] >= self.opt.track_thresh] + + # first association + N = len(results) + M = len(self.tracks) + self.alive = [] + + track_boxes = np.array([[track['bbox'][0], track['bbox'][1], + track['bbox'][2], track['bbox'][3]] for track in self.tracks], np.float32) # M x 4 + det_boxes = np.array([[item['bbox'][0], item['bbox'][1], + item['bbox'][2], item['bbox'][3]] for item in results], np.float32) # N x 4 + box_ious = self.bbox_overlaps_py(det_boxes, track_boxes) + + dets = np.array( + [det['ct'] + det['tracking'] for det in results], np.float32) # N x 2 + track_size = np.array([((track['bbox'][2] - track['bbox'][0]) * \ + (track['bbox'][3] - track['bbox'][1])) \ + for track in self.tracks], np.float32) # M + track_cat = np.array([track['class'] for track in self.tracks], np.int32) # M + item_size = np.array([((item['bbox'][2] - item['bbox'][0]) * \ + (item['bbox'][3] - item['bbox'][1])) \ + for item in results], np.float32) # N + item_cat = np.array([item['class'] for item in results], np.int32) # N + tracks = np.array( + [pre_det['ct'] for pre_det in self.tracks], np.float32) # M x 2 + dist = (((tracks.reshape(1, -1, 2) - \ + dets.reshape(-1, 1, 2)) ** 2).sum(axis=2)) # N x M + + if self.opt.dataset == 'youtube_vis': + invalid = ((dist > track_size.reshape(1, M)) + \ + (dist > item_size.reshape(N, 1)) + (box_ious < self.opt.overlap_thresh)) > 0 + else: + invalid = ((dist > track_size.reshape(1, M)) + \ + (dist > item_size.reshape(N, 1)) + \ + (item_cat.reshape(N, 1) != track_cat.reshape(1, M)) + (box_ious < self.opt.overlap_thresh)) > 0 + dist = dist + invalid * 1e18 + + if self.opt.hungarian: + item_score = np.array([item['score'] for item in results], np.float32) # N + dist[dist > 1e18] = 1e18 + matched_indices = linear_assignment(dist) + else: + matched_indices = greedy_assignment(copy.deepcopy(dist)) + unmatched_dets = [d for d in range(dets.shape[0]) \ + if not (d in matched_indices[:, 0])] + unmatched_tracks = [d for d in range(tracks.shape[0]) \ + if not (d in matched_indices[:, 1])] + + if self.opt.hungarian: + matches = [] + for m in matched_indices: + if dist[m[0], m[1]] > 1e16: + unmatched_dets.append(m[0]) + unmatched_tracks.append(m[1]) + else: + matches.append(m) + matches = np.array(matches).reshape(-1, 2) + else: + matches = matched_indices + + ret = [] + for m in matches: + track = results[m[0]] + track['tracking_id'] = self.tracks[m[1]]['tracking_id'] + track['age'] = 1 + track['active'] = self.tracks[m[1]]['active'] + 1 + if 'embedding' in track: + self.alive.append(track['tracking_id']) + self.embedding_bank[self.tracks[m[1]]['tracking_id'] - 1, :] = self.alpha * track['embedding'] \ + + (1 - self.alpha) * self.embedding_bank[ + self.tracks[m[1]][ + 'tracking_id'] - 1, + :] + self.cat_bank[self.tracks[m[1]]['tracking_id'] - 1] = track['class'] + ret.append(track) + + if self.opt.public_det and len(unmatched_dets) > 0: + # Public detection: only create tracks from provided detections + pub_dets = np.array([d['ct'] for d in public_det], np.float32) + dist3 = ((dets.reshape(-1, 1, 2) - pub_dets.reshape(1, -1, 2)) ** 2).sum( + axis=2) + matched_dets = [d for d in range(dets.shape[0]) \ + if not (d in unmatched_dets)] + dist3[matched_dets] = 1e18 + for j in range(len(pub_dets)): + i = dist3[:, j].argmin() + if dist3[i, j] < item_size[i]: + dist3[i, :] = 1e18 + track = results[i] + if track['score'] > self.opt.new_thresh: + self.id_count += 1 + track['tracking_id'] = self.id_count + track['age'] = 1 + track['active'] = 1 + ret.append(track) + else: + # Private detection: create tracks for all un-matched detections + for i in unmatched_dets: + track = results[i] + if track['score'] > self.opt.new_thresh: + if 'embedding' in track: + max_id, max_cos = self.get_similarity(track['embedding'], False, track['class']) + if max_cos >= 0.3 and self.tracklet_ages[max_id - 1] < self.opt.window_size: + track['tracking_id'] = max_id + track['age'] = 1 + track['active'] = 1 + self.embedding_bank[track['tracking_id'] - 1, :] = self.alpha * track['embedding'] \ + + (1 - self.alpha) * self.embedding_bank[track['tracking_id'] - 1,:] + else: + self.id_count += 1 + track['tracking_id'] = self.id_count + track['age'] = 1 + track['active'] = 1 + self.embedding_bank[self.id_count - 1, :] = track['embedding'] + self.cat_bank[self.id_count - 1] = track['class'] + self.alive.append(track['tracking_id']) + ret.append(track) + else: + self.id_count += 1 + track['tracking_id'] = self.id_count + track['age'] = 1 + track['active'] = 1 + ret.append(track) + + self.tracklet_ages[:self.id_count] = self.tracklet_ages[:self.id_count] + 1 + for track in ret: + self.tracklet_ages[track['tracking_id'] - 1] = 1 + + + # second association + results_second = [item for item in results_with_low if item['score'] < self.opt.track_thresh] + self_tracks_second = [self.tracks[i] for i in unmatched_tracks if self.tracks[i]['active'] > 0] + second2original = [i for i in unmatched_tracks if self.tracks[i]['active'] > 0] + + N = len(results_second) + M = len(self_tracks_second) + + if N > 0 and M > 0: + + track_boxes_second = np.array([[track['bbox'][0], track['bbox'][1], + track['bbox'][2], track['bbox'][3]] for track in self_tracks_second], np.float32) # M x 4 + det_boxes_second = np.array([[item['bbox'][0], item['bbox'][1], + item['bbox'][2], item['bbox'][3]] for item in results_second], np.float32) # N x 4 + box_ious_second = self.bbox_overlaps_py(det_boxes_second, track_boxes_second) + + dets = np.array( + [det['ct'] + det['tracking'] for det in results_second], np.float32) # N x 2 + track_size = np.array([((track['bbox'][2] - track['bbox'][0]) * \ + (track['bbox'][3] - track['bbox'][1])) \ + for track in self_tracks_second], np.float32) # M + track_cat = np.array([track['class'] for track in self_tracks_second], np.int32) # M + item_size = np.array([((item['bbox'][2] - item['bbox'][0]) * \ + (item['bbox'][3] - item['bbox'][1])) \ + for item in results_second], np.float32) # N + item_cat = np.array([item['class'] for item in results_second], np.int32) # N + tracks_second = np.array( + [pre_det['ct'] for pre_det in self_tracks_second], np.float32) # M x 2 + dist = (((tracks_second.reshape(1, -1, 2) - \ + dets.reshape(-1, 1, 2)) ** 2).sum(axis=2)) # N x M + + invalid = ((dist > track_size.reshape(1, M)) + \ + (dist > item_size.reshape(N, 1)) + \ + (item_cat.reshape(N, 1) != track_cat.reshape(1, M)) + (box_ious_second < 0.3)) > 0 + dist = dist + invalid * 1e18 + + matched_indices_second = greedy_assignment(copy.deepcopy(dist), 1e8) + unmatched_tracks_second = [d for d in range(tracks_second.shape[0]) \ + if not (d in matched_indices_second[:, 1])] + matches_second = matched_indices_second + + for m in matches_second: + track = results_second[m[0]] + track['tracking_id'] = self_tracks_second[m[1]]['tracking_id'] + track['age'] = 1 + track['active'] = self_tracks_second[m[1]]['active'] + 1 + if 'embedding' in track: + self.alive.append(track['tracking_id']) + self.embedding_bank[self_tracks_second[m[1]]['tracking_id'] - 1, :] = self.alpha * track['embedding'] \ + + (1 - self.alpha) * self.embedding_bank[self_tracks_second[m[1]]['tracking_id'] - 1,:] + self.cat_bank[self_tracks_second[m[1]]['tracking_id'] - 1] = track['class'] + ret.append(track) + + unmatched_tracks = [second2original[i] for i in unmatched_tracks_second] + \ + [i for i in unmatched_tracks if self.tracks[i]['active'] == 0] + + + # Never used + for i in unmatched_tracks: + track = self.tracks[i] + if track['age'] < self.opt.max_age: + track['age'] += 1 + track['active'] = 1 # 0 + bbox = track['bbox'] + ct = track['ct'] + v = [0, 0] + track['bbox'] = [ + bbox[0] + v[0], bbox[1] + v[1], + bbox[2] + v[0], bbox[3] + v[1]] + track['ct'] = [ct[0] + v[0], ct[1] + v[1]] + ret.append(track) + for r_ in ret: + del r_['embedding'] + self.tracks = ret + return ret + + def get_similarity(self, feat, stat, cls): + max_id = -1 + max_cos = -1 + if stat: + nID = self.id_count + else: + nID = self.id_count + + a = feat[None, :] + b = self.embedding_bank[:nID, :] + if len(b) > 0: + alive = np.array(self.alive, dtype=np.int) - 1 + cosim = cosine(a, b) + cosim = np.reshape(cosim, newshape=(-1)) + cosim[alive] = -2 + cosim[nID - 1] = -2 + cosim[np.where(self.cat_bank[:nID] != cls)[0]] = -2 + max_id = int(np.argmax(cosim) + 1) + max_cos = np.max(cosim) + return max_id, max_cos + + def bbox_overlaps_py(self, boxes, query_boxes): + """ + determine overlaps between boxes and query_boxes + :param boxes: n * 4 bounding boxes + :param query_boxes: k * 4 bounding boxes + :return: overlaps: n * k overlaps + """ + n_ = boxes.shape[0] + k_ = query_boxes.shape[0] + overlaps = np.zeros((n_, k_), dtype=np.float) + for k in range(k_): + query_box_area = (query_boxes[k, 2] - query_boxes[k, 0] + 1) * (query_boxes[k, 3] - query_boxes[k, 1] + 1) + for n in range(n_): + iw = min(boxes[n, 2], query_boxes[k, 2]) - max(boxes[n, 0], query_boxes[k, 0]) + 1 + if iw > 0: + ih = min(boxes[n, 3], query_boxes[k, 3]) - max(boxes[n, 1], query_boxes[k, 1]) + 1 + if ih > 0: + box_area = (boxes[n, 2] - boxes[n, 0] + 1) * (boxes[n, 3] - boxes[n, 1] + 1) + all_area = float(box_area + query_box_area - iw * ih) + overlaps[n, k] = iw * ih / all_area + return overlaps + + + +def greedy_assignment(dist, thresh=1e16): + matched_indices = [] + if dist.shape[1] == 0: + return np.array(matched_indices, np.int32).reshape(-1, 2) + for i in range(dist.shape[0]): + j = dist[i].argmin() + if dist[i][j] < thresh: + dist[:, j] = 1e18 + matched_indices.append([i, j]) + return np.array(matched_indices, np.int32).reshape(-1, 2) diff --git a/tutorials/transtrack/README.md b/tutorials/transtrack/README.md new file mode 100644 index 0000000000000000000000000000000000000000..193965abc7c18906bf8072e034448c8fd6e5aab3 --- /dev/null +++ b/tutorials/transtrack/README.md @@ -0,0 +1,45 @@ +# TransTrack + +Step1. git clone https://github.com/PeizeSun/TransTrack.git + + +Step2. + +replace https://github.com/PeizeSun/TransTrack/blob/main/models/tracker.py + +Step3. + +Download TransTrack pretrained model: [671mot17_crowdhuman_mot17.pth](https://drive.google.com/drive/folders/1DjPL8xWoXDASrxgsA3O06EspJRdUXFQ-?usp=sharing) + + +Step3. run +``` +python3 main_track.py --output_dir . --dataset_file mot --coco_path mot --batch_size 1 --resume pretrained/671mot17_crowdhuman_mot17.pth --eval --with_box_refine --num_queries 500 +``` + + +# TransTrack_BYTE + +Step1. git clone https://github.com/PeizeSun/TransTrack.git + +Step2. + +replace https://github.com/PeizeSun/TransTrack/blob/main/models/save_track.py + +replace https://github.com/PeizeSun/TransTrack/blob/main/engine_track.py + +replace https://github.com/PeizeSun/TransTrack/blob/main/main_track.py + +add mot_online to https://github.com/PeizeSun/TransTrack + +Step3. run +``` +python3 main_track.py --output_dir . --dataset_file mot --coco_path mot --batch_size 1 --resume pretrained/671mot17_crowdhuman_mot17.pth --eval --with_box_refine --num_queries 500 +``` + + +## Notes +tracker.py: only motion + +mot_online/byte_tracker.py: motion with kalman filter + diff --git a/tutorials/transtrack/engine_track.py b/tutorials/transtrack/engine_track.py new file mode 100644 index 0000000000000000000000000000000000000000..925a4f2ba1b6ee4e7daaed7c0a901362ee223ddb --- /dev/null +++ b/tutorials/transtrack/engine_track.py @@ -0,0 +1,277 @@ +# Modified by Peize Sun, Rufeng Zhang +# ------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ +""" +Train and eval functions used in main.py +""" +import math +import os +import sys +from typing import Iterable + +import torch +import util.misc as utils +from datasets.coco_eval import CocoEvaluator +from datasets.panoptic_eval import PanopticEvaluator +from datasets.data_prefetcher import data_prefetcher +from mot_online.byte_tracker import BYTETracker + + +def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module, + data_loader: Iterable, optimizer: torch.optim.Optimizer, + device: torch.device, epoch: int, max_norm: float = 0): + model.train() + criterion.train() + metric_logger = utils.MetricLogger(delimiter=" ") + metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) + metric_logger.add_meter('class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) + metric_logger.add_meter('grad_norm', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) + header = 'Epoch: [{}]'.format(epoch) + print_freq = 10 + + prefetcher = data_prefetcher(data_loader, device, prefetch=True) + samples, targets = prefetcher.next() + + # for samples, targets in metric_logger.log_every(data_loader, print_freq, header): + for _ in metric_logger.log_every(range(len(data_loader)), print_freq, header): + outputs, pre_outputs, pre_targets = model([samples, targets]) + loss_dict = criterion(outputs, targets, pre_outputs, pre_targets) + weight_dict = criterion.weight_dict + losses = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict) + + # reduce losses over all GPUs for logging purposes + loss_dict_reduced = utils.reduce_dict(loss_dict) + loss_dict_reduced_unscaled = {f'{k}_unscaled': v + for k, v in loss_dict_reduced.items()} + loss_dict_reduced_scaled = {k: v * weight_dict[k] + for k, v in loss_dict_reduced.items() if k in weight_dict} + losses_reduced_scaled = sum(loss_dict_reduced_scaled.values()) + + loss_value = losses_reduced_scaled.item() + + if not math.isfinite(loss_value): + print("Loss is {}, stopping training".format(loss_value)) + print(loss_dict_reduced) + sys.exit(1) + + optimizer.zero_grad() + losses.backward() + if max_norm > 0: + grad_total_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm) + else: + grad_total_norm = utils.get_total_grad_norm(model.parameters(), max_norm) + optimizer.step() + + metric_logger.update(loss=loss_value, **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled) + metric_logger.update(class_error=loss_dict_reduced['class_error']) + metric_logger.update(lr=optimizer.param_groups[0]["lr"]) + metric_logger.update(grad_norm=grad_total_norm) + + samples, targets = prefetcher.next() + # gather the stats from all processes + metric_logger.synchronize_between_processes() + print("Averaged stats:", metric_logger) + return {k: meter.global_avg for k, meter in metric_logger.meters.items()} + + +@torch.no_grad() +def evaluate(model, criterion, postprocessors, data_loader, base_ds, device, output_dir, tracker=None, + phase='train', det_val=False): + model.eval() + criterion.eval() + + metric_logger = utils.MetricLogger(delimiter=" ") + metric_logger.add_meter('class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) + header = 'Test:' + + iou_types = tuple(k for k in ('segm', 'bbox') if k in postprocessors.keys()) + coco_evaluator = CocoEvaluator(base_ds, iou_types) + # coco_evaluator.coco_eval[iou_types[0]].params.iouThrs = [0, 0.1, 0.5, 0.75] + + panoptic_evaluator = None + if 'panoptic' in postprocessors.keys(): + panoptic_evaluator = PanopticEvaluator( + data_loader.dataset.ann_file, + data_loader.dataset.ann_folder, + output_dir=os.path.join(output_dir, "panoptic_eval"), + ) + + res_tracks = dict() + pre_embed = None + for samples, targets in metric_logger.log_every(data_loader, 10, header): + # pre process for track. + if tracker is not None: + if phase != 'train': + assert samples.tensors.shape[0] == 1, "Now only support inference of batchsize 1." + frame_id = targets[0].get("frame_id", None) + assert frame_id is not None + frame_id = frame_id.item() + if frame_id == 1: + tracker.reset_all() + pre_embed = None + + samples = samples.to(device) + targets = [{k: v.to(device) for k, v in t.items()} for t in targets] + + if det_val: + outputs = model(samples) + else: + outputs, pre_embed = model(samples, pre_embed) + loss_dict = criterion(outputs, targets) + weight_dict = criterion.weight_dict + +# reduce losses over all GPUs for logging purposes + loss_dict_reduced = utils.reduce_dict(loss_dict) + loss_dict_reduced_scaled = {k: v * weight_dict[k] + for k, v in loss_dict_reduced.items() if k in weight_dict} + loss_dict_reduced_unscaled = {f'{k}_unscaled': v + for k, v in loss_dict_reduced.items()} + metric_logger.update(loss=sum(loss_dict_reduced_scaled.values()), + **loss_dict_reduced_scaled, + **loss_dict_reduced_unscaled) + metric_logger.update(class_error=loss_dict_reduced['class_error']) + + orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0) + results = postprocessors['bbox'](outputs, orig_target_sizes) + + if 'segm' in postprocessors.keys(): + target_sizes = torch.stack([t["size"] for t in targets], dim=0) + results = postprocessors['segm'](results, outputs, orig_target_sizes, target_sizes) + res = {target['image_id'].item(): output for target, output in zip(targets, results)} + + # post process for track. + if tracker is not None: + if frame_id == 1: + res_track = tracker.init_track(results[0]) + else: + res_track = tracker.step(results[0]) + res_tracks[targets[0]['image_id'].item()] = res_track + + if coco_evaluator is not None: + coco_evaluator.update(res) + + if panoptic_evaluator is not None: + res_pano = postprocessors["panoptic"](outputs, target_sizes, orig_target_sizes) + for i, target in enumerate(targets): + image_id = target["image_id"].item() + file_name = f"{image_id:012d}.png" + res_pano[i]["image_id"] = image_id + res_pano[i]["file_name"] = file_name + + panoptic_evaluator.update(res_pano) + + # gather the stats from all processes + metric_logger.synchronize_between_processes() + print("Averaged stats:", metric_logger) + if coco_evaluator is not None: + coco_evaluator.synchronize_between_processes() + if panoptic_evaluator is not None: + panoptic_evaluator.synchronize_between_processes() + + # accumulate predictions from all images + if coco_evaluator is not None: + coco_evaluator.accumulate() + coco_evaluator.summarize() + panoptic_res = None + if panoptic_evaluator is not None: + panoptic_res = panoptic_evaluator.summarize() + stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()} + if coco_evaluator is not None: + if 'bbox' in postprocessors.keys(): + stats['coco_eval_bbox'] = coco_evaluator.coco_eval['bbox'].stats.tolist() + if 'segm' in postprocessors.keys(): + stats['coco_eval_masks'] = coco_evaluator.coco_eval['segm'].stats.tolist() + if panoptic_res is not None: + stats['PQ_all'] = panoptic_res["All"] + stats['PQ_th'] = panoptic_res["Things"] + stats['PQ_st'] = panoptic_res["Stuff"] + return stats, coco_evaluator, res_tracks + + +@torch.no_grad() +def evaluate_track(args, model, criterion, postprocessors, data_loader, base_ds, device, output_dir, tracker=None, + phase='train', det_val=False): + model.eval() + criterion.eval() + + metric_logger = utils.MetricLogger(delimiter=" ") + metric_logger.add_meter('class_error', utils.SmoothedValue(window_size=1, fmt='{value:.2f}')) + header = 'Test:' + + iou_types = tuple(k for k in ('segm', 'bbox') if k in postprocessors.keys()) + coco_evaluator = CocoEvaluator(base_ds, iou_types) + # coco_evaluator.coco_eval[iou_types[0]].params.iouThrs = [0, 0.1, 0.5, 0.75] + + res_tracks = dict() + pre_embed = None + for samples, targets in metric_logger.log_every(data_loader, 50, header): + # pre process for track. + if tracker is not None: + frame_id = targets[0].get("frame_id", None) + assert frame_id is not None + frame_id = frame_id.item() + if frame_id == 1: + tracker = BYTETracker(args) + pre_embed = None + + samples = samples.to(device) + targets = [{k: v.to(device) for k, v in t.items()} for t in targets] + + if det_val: + outputs = model(samples) + else: + outputs, pre_embed = model(samples, pre_embed) + loss_dict = criterion(outputs, targets) + weight_dict = criterion.weight_dict + +# reduce losses over all GPUs for logging purposes + loss_dict_reduced = utils.reduce_dict(loss_dict) + loss_dict_reduced_scaled = {k: v * weight_dict[k] + for k, v in loss_dict_reduced.items() if k in weight_dict} + loss_dict_reduced_unscaled = {f'{k}_unscaled': v + for k, v in loss_dict_reduced.items()} + metric_logger.update(loss=sum(loss_dict_reduced_scaled.values()), + **loss_dict_reduced_scaled, + **loss_dict_reduced_unscaled) + metric_logger.update(class_error=loss_dict_reduced['class_error']) + + orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0) + results = postprocessors['bbox'](outputs, orig_target_sizes) + + if 'segm' in postprocessors.keys(): + target_sizes = torch.stack([t["size"] for t in targets], dim=0) + results = postprocessors['segm'](results, outputs, orig_target_sizes, target_sizes) + res = {target['image_id'].item(): output for target, output in zip(targets, results)} + + # post process for track. + if tracker is not None: + res_track = tracker.update(results[0]) + res_tracks[targets[0]['image_id'].item()] = res_track + + if coco_evaluator is not None: + coco_evaluator.update(res) + + # gather the stats from all processes + metric_logger.synchronize_between_processes() + print("Averaged stats:", metric_logger) + if coco_evaluator is not None: + coco_evaluator.synchronize_between_processes() + + # accumulate predictions from all images + if coco_evaluator is not None: + coco_evaluator.accumulate() + coco_evaluator.summarize() + + stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()} + if coco_evaluator is not None: + if 'bbox' in postprocessors.keys(): + stats['coco_eval_bbox'] = coco_evaluator.coco_eval['bbox'].stats.tolist() + if 'segm' in postprocessors.keys(): + stats['coco_eval_masks'] = coco_evaluator.coco_eval['segm'].stats.tolist() + return stats, coco_evaluator, res_tracks \ No newline at end of file diff --git a/tutorials/transtrack/main_track.py b/tutorials/transtrack/main_track.py new file mode 100644 index 0000000000000000000000000000000000000000..efe1a0ac1fd5b26aee2d9d6582ce25e441786080 --- /dev/null +++ b/tutorials/transtrack/main_track.py @@ -0,0 +1,375 @@ +# Modified by Peize Sun, Rufeng Zhang +# ------------------------------------------------------------------------ +# Deformable DETR +# Copyright (c) 2020 SenseTime. All Rights Reserved. +# Licensed under the Apache License, Version 2.0 [see LICENSE for details] +# ------------------------------------------------------------------------ +# Modified from DETR (https://github.com/facebookresearch/detr) +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# ------------------------------------------------------------------------ +import argparse +import datetime +import json +import random +import time +from pathlib import Path + +import numpy as np +import torch +from torch.utils.data import DataLoader +import datasets +import util.misc as utils +import datasets.samplers as samplers +from datasets import build_dataset, get_coco_api_from_dataset +from engine_track import evaluate, train_one_epoch, evaluate_track +from models import build_tracktrain_model, build_tracktest_model, build_model +from models import Tracker +from models import save_track +from mot_online.byte_tracker import BYTETracker + +from collections import defaultdict + + +def get_args_parser(): + parser = argparse.ArgumentParser('Deformable DETR Detector', add_help=False) + parser.add_argument('--lr', default=2e-4, type=float) + parser.add_argument('--lr_backbone_names', default=["backbone.0"], type=str, nargs='+') + parser.add_argument('--lr_backbone', default=2e-5, type=float) + parser.add_argument('--lr_linear_proj_names', default=['reference_points', 'sampling_offsets'], type=str, nargs='+') + parser.add_argument('--lr_linear_proj_mult', default=0.1, type=float) + parser.add_argument('--batch_size', default=1, type=int) + parser.add_argument('--weight_decay', default=1e-4, type=float) + parser.add_argument('--epochs', default=50, type=int) + parser.add_argument('--lr_drop', default=40, type=int) + parser.add_argument('--lr_drop_epochs', default=None, type=int, nargs='+') + parser.add_argument('--clip_max_norm', default=0.1, type=float, + help='gradient clipping max norm') + + parser.add_argument('--sgd', action='store_true') + + # Variants of Deformable DETR + parser.add_argument('--with_box_refine', default=True, action='store_true') + parser.add_argument('--two_stage', default=False, action='store_true') + + # Model parameters + parser.add_argument('--frozen_weights', type=str, default=None, + help="Path to the pretrained model. If set, only the mask head will be trained") + + # * Backbone + parser.add_argument('--backbone', default='resnet50', type=str, + help="Name of the convolutional backbone to use") + parser.add_argument('--dilation', action='store_true', + help="If true, we replace stride with dilation in the last convolutional block (DC5)") + parser.add_argument('--position_embedding', default='sine', type=str, choices=('sine', 'learned'), + help="Type of positional embedding to use on top of the image features") + parser.add_argument('--position_embedding_scale', default=2 * np.pi, type=float, + help="position / size * scale") + parser.add_argument('--num_feature_levels', default=4, type=int, help='number of feature levels') + + # * Transformer + parser.add_argument('--enc_layers', default=6, type=int, + help="Number of encoding layers in the transformer") + parser.add_argument('--dec_layers', default=6, type=int, + help="Number of decoding layers in the transformer") + parser.add_argument('--dim_feedforward', default=1024, type=int, + help="Intermediate size of the feedforward layers in the transformer blocks") + parser.add_argument('--hidden_dim', default=256, type=int, + help="Size of the embeddings (dimension of the transformer)") + parser.add_argument('--dropout', default=0.1, type=float, + help="Dropout applied in the transformer") + parser.add_argument('--nheads', default=8, type=int, + help="Number of attention heads inside the transformer's attentions") + parser.add_argument('--num_queries', default=500, type=int, + help="Number of query slots") + parser.add_argument('--dec_n_points', default=4, type=int) + parser.add_argument('--enc_n_points', default=4, type=int) + + # * Segmentation + parser.add_argument('--masks', action='store_true', + help="Train segmentation head if the flag is provided") + + # Loss + parser.add_argument('--no_aux_loss', dest='aux_loss', action='store_false', + help="Disables auxiliary decoding losses (loss at each layer)") + + # * Matcher + parser.add_argument('--set_cost_class', default=2, type=float, + help="Class coefficient in the matching cost") + parser.add_argument('--set_cost_bbox', default=5, type=float, + help="L1 box coefficient in the matching cost") + parser.add_argument('--set_cost_giou', default=2, type=float, + help="giou box coefficient in the matching cost") + + # * Loss coefficients + parser.add_argument('--mask_loss_coef', default=1, type=float) + parser.add_argument('--dice_loss_coef', default=1, type=float) + parser.add_argument('--cls_loss_coef', default=2, type=float) + parser.add_argument('--bbox_loss_coef', default=5, type=float) + parser.add_argument('--giou_loss_coef', default=2, type=float) + parser.add_argument('--focal_alpha', default=0.25, type=float) + parser.add_argument('--id_loss_coef', default=1, type=float) + + # dataset parameters + parser.add_argument('--dataset_file', default='coco') + parser.add_argument('--coco_path', default='./data/coco', type=str) + parser.add_argument('--coco_panoptic_path', type=str) + parser.add_argument('--remove_difficult', action='store_true') + + parser.add_argument('--output_dir', default='', + help='path where to save, empty for no saving') + parser.add_argument('--device', default='cuda', + help='device to use for training / testing') + parser.add_argument('--seed', default=42, type=int) + parser.add_argument('--resume', default='', help='resume from checkpoint') + parser.add_argument('--start_epoch', default=0, type=int, metavar='N', + help='start epoch') + parser.add_argument('--eval', action='store_true') + parser.add_argument('--num_workers', default=2, type=int) + parser.add_argument('--cache_mode', default=False, action='store_true', help='whether to cache images on memory') + + # PyTorch checkpointing for saving memory (torch.utils.checkpoint.checkpoint) + parser.add_argument('--checkpoint_enc_ffn', default=False, action='store_true') + parser.add_argument('--checkpoint_dec_ffn', default=False, action='store_true') + + # appended for track. + parser.add_argument('--track_train_split', default='train', type=str) + parser.add_argument('--track_eval_split', default='val', type=str) + parser.add_argument('--track_thresh', default=0.4, type=float) + parser.add_argument('--reid_shared', default=False, type=bool) + parser.add_argument('--reid_dim', default=128, type=int) + parser.add_argument('--num_ids', default=360, type=int) + + + # detector for track. + parser.add_argument('--det_val', default=False, action='store_true') + + + return parser + + +def main(args): + utils.init_distributed_mode(args) + print("git:\n {}\n".format(utils.get_sha())) + + if args.frozen_weights is not None: + assert args.masks, "Frozen training is meant for segmentation only" + print(args) + + device = torch.device(args.device) + + # fix the seed for reproducibility + seed = args.seed + utils.get_rank() + torch.manual_seed(seed) + np.random.seed(seed) + random.seed(seed) + + if args.det_val: + assert args.eval, 'only support eval mode of detector for track' + model, criterion, postprocessors = build_model(args) + elif args.eval: + model, criterion, postprocessors = build_tracktest_model(args) + else: + model, criterion, postprocessors = build_tracktrain_model(args) + + model.to(device) + + model_without_ddp = model + n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad) + print('number of params:', n_parameters) + + dataset_train = build_dataset(image_set=args.track_train_split, args=args) + dataset_val = build_dataset(image_set=args.track_eval_split, args=args) + + if args.distributed: + if args.cache_mode: + sampler_train = samplers.NodeDistributedSampler(dataset_train) + sampler_val = samplers.NodeDistributedSampler(dataset_val, shuffle=False) + else: + sampler_train = samplers.DistributedSampler(dataset_train) + sampler_val = samplers.DistributedSampler(dataset_val, shuffle=False) + else: + sampler_train = torch.utils.data.RandomSampler(dataset_train) + sampler_val = torch.utils.data.SequentialSampler(dataset_val) + + batch_sampler_train = torch.utils.data.BatchSampler( + sampler_train, args.batch_size, drop_last=True) + + data_loader_train = DataLoader(dataset_train, batch_sampler=batch_sampler_train, + collate_fn=utils.collate_fn, num_workers=args.num_workers, + pin_memory=True) + data_loader_val = DataLoader(dataset_val, args.batch_size, sampler=sampler_val, + drop_last=False, collate_fn=utils.collate_fn, num_workers=args.num_workers, + pin_memory=True) + + # lr_backbone_names = ["backbone.0", "backbone.neck", "input_proj", "transformer.encoder"] + def match_name_keywords(n, name_keywords): + out = False + for b in name_keywords: + if b in n: + out = True + break + return out + + for n, p in model_without_ddp.named_parameters(): + print(n) + + param_dicts = [ + { + "params": + [p for n, p in model_without_ddp.named_parameters() + if not match_name_keywords(n, args.lr_backbone_names) and not match_name_keywords(n, args.lr_linear_proj_names) and p.requires_grad], + "lr": args.lr, + }, + { + "params": [p for n, p in model_without_ddp.named_parameters() if match_name_keywords(n, args.lr_backbone_names) and p.requires_grad], + "lr": args.lr_backbone, + }, + { + "params": [p for n, p in model_without_ddp.named_parameters() if match_name_keywords(n, args.lr_linear_proj_names) and p.requires_grad], + "lr": args.lr * args.lr_linear_proj_mult, + } + ] + if args.sgd: + optimizer = torch.optim.SGD(param_dicts, lr=args.lr, momentum=0.9, + weight_decay=args.weight_decay) + else: + optimizer = torch.optim.AdamW(param_dicts, lr=args.lr, + weight_decay=args.weight_decay) + lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_drop) + + if args.distributed: + model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], find_unused_parameters=True) + model_without_ddp = model.module + + if args.dataset_file == "coco_panoptic": + # We also evaluate AP during panoptic training, on original coco DS + coco_val = datasets.coco.build("val", args) + base_ds = get_coco_api_from_dataset(coco_val) + else: + base_ds = get_coco_api_from_dataset(dataset_val) + + if args.frozen_weights is not None: + checkpoint = torch.load(args.frozen_weights, map_location='cpu') + model_without_ddp.detr.load_state_dict(checkpoint['model']) + + output_dir = Path(args.output_dir) + if args.resume: + if args.resume.startswith('https'): + checkpoint = torch.hub.load_state_dict_from_url( + args.resume, map_location='cpu', check_hash=True) + else: + checkpoint = torch.load(args.resume, map_location='cpu') + missing_keys, unexpected_keys = model_without_ddp.load_state_dict(checkpoint['model'], strict=False) + unexpected_keys = [k for k in unexpected_keys if not (k.endswith('total_params') or k.endswith('total_ops'))] + if len(missing_keys) > 0: + print('Missing Keys: {}'.format(missing_keys)) + if len(unexpected_keys) > 0: + print('Unexpected Keys: {}'.format(unexpected_keys)) + if not args.eval and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint: + import copy + p_groups = copy.deepcopy(optimizer.param_groups) + optimizer.load_state_dict(checkpoint['optimizer']) + for pg, pg_old in zip(optimizer.param_groups, p_groups): + pg['lr'] = pg_old['lr'] + pg['initial_lr'] = pg_old['initial_lr'] + print(optimizer.param_groups) + lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) + # todo: this is a hack for doing experiment that resume from checkpoint and also modify lr scheduler (e.g., decrease lr in advance). + args.override_resumed_lr_drop = True + if args.override_resumed_lr_drop: + print('Warning: (hack) args.override_resumed_lr_drop is set to True, so args.lr_drop would override lr_drop in resumed lr_scheduler.') + lr_scheduler.step_size = args.lr_drop + lr_scheduler.base_lrs = list(map(lambda group: group['initial_lr'], optimizer.param_groups)) + lr_scheduler.step(lr_scheduler.last_epoch) + args.start_epoch = checkpoint['epoch'] + 1 + # check the resumed model +# if not args.eval: +# test_stats, coco_evaluator, _ = evaluate( +# model, criterion, postprocessors, data_loader_val, base_ds, device, args.output_dir +# ) + + if args.eval: + assert args.batch_size == 1, print("Now only support 1.") + # tracker = MOTXTracker(score_thresh=args.track_thresh) + # test_stats, coco_evaluator, res_tracks = evaluate(model, criterion, postprocessors, data_loader_val, + # base_ds, device, args.output_dir, tracker=tracker, + # phase='eval', det_val=args.det_val) + tracker = BYTETracker(args) + test_stats, coco_evaluator, res_tracks = evaluate_track(args, model, criterion, postprocessors, data_loader_val, + base_ds, device, args.output_dir, tracker=tracker, + phase='eval', det_val=args.det_val) + if args.output_dir: + utils.save_on_master(coco_evaluator.coco_eval["bbox"].eval, output_dir / "eval.pth") + if res_tracks is not None: + print("Creating video index for {}.".format(args.dataset_file)) + video_to_images = defaultdict(list) + video_names = defaultdict() + for _, info in dataset_val.coco.imgs.items(): + video_to_images[info["video_id"]].append({"image_id": info["id"], + "frame_id": info["frame_id"]}) + video_name = info["file_name"].split("/")[0] + if video_name not in video_names: + video_names[info["video_id"]] = video_name + assert len(video_to_images) == len(video_names) + # save mot results. + save_track(res_tracks, args.output_dir, video_to_images, video_names, args.track_eval_split) + + return + + print("Start training") + start_time = time.time() + for epoch in range(args.start_epoch, args.epochs): + if args.distributed: + sampler_train.set_epoch(epoch) + train_stats = train_one_epoch( + model, criterion, data_loader_train, optimizer, device, epoch, args.clip_max_norm) + lr_scheduler.step() + if args.output_dir: + checkpoint_paths = [output_dir / 'checkpoint.pth'] + # extra checkpoint before LR drop and every 5 epochs + if (epoch + 1) % args.lr_drop == 0 or (epoch + 1) % 5 == 0: + checkpoint_paths.append(output_dir / f'checkpoint{epoch:04}.pth') + for checkpoint_path in checkpoint_paths: + utils.save_on_master({ + 'model': model_without_ddp.state_dict(), + 'optimizer': optimizer.state_dict(), + 'lr_scheduler': lr_scheduler.state_dict(), + 'epoch': epoch, + 'args': args, + }, checkpoint_path) + if epoch % 10 == 0 or epoch > args.epochs - 5: + test_stats, coco_evaluator, _ = evaluate( + model, criterion, postprocessors, data_loader_val, base_ds, device, args.output_dir, + ) + + log_stats = {**{f'train_{k}': v for k, v in train_stats.items()}, + **{f'test_{k}': v for k, v in test_stats.items()}, + 'epoch': epoch, + 'n_parameters': n_parameters} + + if args.output_dir and utils.is_main_process(): + with (output_dir / "log.txt").open("a") as f: + f.write(json.dumps(log_stats) + "\n") + + # for evaluation logs + if coco_evaluator is not None: + (output_dir / 'eval').mkdir(exist_ok=True) + if "bbox" in coco_evaluator.coco_eval: + filenames = ['latest.pth'] + if epoch % 50 == 0: + filenames.append(f'{epoch:03}.pth') + for name in filenames: + torch.save(coco_evaluator.coco_eval["bbox"].eval, + output_dir / "eval" / name) + + total_time = time.time() - start_time + total_time_str = str(datetime.timedelta(seconds=int(total_time))) + print('Training time {}'.format(total_time_str)) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser('Deformable DETR training and evaluation script', parents=[get_args_parser()]) + args = parser.parse_args() + if args.output_dir: + Path(args.output_dir).mkdir(parents=True, exist_ok=True) + main(args) diff --git a/tutorials/transtrack/mot_online/basetrack.py b/tutorials/transtrack/mot_online/basetrack.py new file mode 100644 index 0000000000000000000000000000000000000000..a7130b5cc08ac55705c155594d0f2a1d09f96774 --- /dev/null +++ b/tutorials/transtrack/mot_online/basetrack.py @@ -0,0 +1,52 @@ +import numpy as np +from collections import OrderedDict + + +class TrackState(object): + New = 0 + Tracked = 1 + Lost = 2 + Removed = 3 + + +class BaseTrack(object): + _count = 0 + + track_id = 0 + is_activated = False + state = TrackState.New + + history = OrderedDict() + features = [] + curr_feature = None + score = 0 + start_frame = 0 + frame_id = 0 + time_since_update = 0 + + # multi-camera + location = (np.inf, np.inf) + + @property + def end_frame(self): + return self.frame_id + + @staticmethod + def next_id(): + BaseTrack._count += 1 + return BaseTrack._count + + def activate(self, *args): + raise NotImplementedError + + def predict(self): + raise NotImplementedError + + def update(self, *args, **kwargs): + raise NotImplementedError + + def mark_lost(self): + self.state = TrackState.Lost + + def mark_removed(self): + self.state = TrackState.Removed \ No newline at end of file diff --git a/tutorials/transtrack/mot_online/byte_tracker.py b/tutorials/transtrack/mot_online/byte_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..be6e179a5d7ab5fb7a276cda82679ce9656f29f1 --- /dev/null +++ b/tutorials/transtrack/mot_online/byte_tracker.py @@ -0,0 +1,354 @@ +import numpy as np +from collections import deque +import os +import os.path as osp +import copy +import torch +import torch.nn.functional as F + +from .kalman_filter import KalmanFilter +from mot_online import matching +from .basetrack import BaseTrack, TrackState + +class STrack(BaseTrack): + shared_kalman = KalmanFilter() + def __init__(self, tlwh, score, buffer_size=30): + + # wait activate + self._tlwh = np.asarray(tlwh, dtype=np.float) + self.kalman_filter = None + self.mean, self.covariance = None, None + self.is_activated = False + + self.score = score + self.tracklet_len = 0 + + def predict(self): + mean_state = self.mean.copy() + if self.state != TrackState.Tracked: + mean_state[7] = 0 + self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) + + @staticmethod + def multi_predict(stracks): + if len(stracks) > 0: + multi_mean = np.asarray([st.mean.copy() for st in stracks]) + multi_covariance = np.asarray([st.covariance for st in stracks]) + for i, st in enumerate(stracks): + if st.state != TrackState.Tracked: + multi_mean[i][7] = 0 + multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance) + for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): + stracks[i].mean = mean + stracks[i].covariance = cov + + def activate(self, kalman_filter, frame_id): + """Start a new tracklet""" + self.kalman_filter = kalman_filter + self.track_id = self.next_id() + self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) + + self.tracklet_len = 0 + self.state = TrackState.Tracked + if frame_id == 1: + self.is_activated = True + #self.is_activated = True + self.frame_id = frame_id + self.start_frame = frame_id + + def re_activate(self, new_track, frame_id, new_id=False): + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) + ) + self.tracklet_len = 0 + self.state = TrackState.Tracked + self.is_activated = True + self.frame_id = frame_id + if new_id: + self.track_id = self.next_id() + self.score = new_track.score + + def update(self, new_track, frame_id): + """ + Update a matched track + :type new_track: STrack + :type frame_id: int + :type update_feature: bool + :return: + """ + self.frame_id = frame_id + self.tracklet_len += 1 + + new_tlwh = new_track.tlwh + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) + self.state = TrackState.Tracked + self.is_activated = True + + self.score = new_track.score + + @property + # @jit(nopython=True) + def tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + """ + if self.mean is None: + return self._tlwh.copy() + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + @property + # @jit(nopython=True) + def tlbr(self): + """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_xyah(tlwh): + """Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = np.asarray(tlwh).copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + def to_xyah(self): + return self.tlwh_to_xyah(self.tlwh) + + @staticmethod + # @jit(nopython=True) + def tlbr_to_tlwh(tlbr): + ret = np.asarray(tlbr).copy() + ret[2:] -= ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_tlbr(tlwh): + ret = np.asarray(tlwh).copy() + ret[2:] += ret[:2] + return ret + + def __repr__(self): + return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) + + +class BYTETracker(object): + def __init__(self, args, frame_rate=30): + self.tracked_stracks = [] # type: list[STrack] + self.lost_stracks = [] # type: list[STrack] + self.removed_stracks = [] # type: list[STrack] + + self.frame_id = 0 + self.args = args + #self.det_thresh = args.track_thresh + self.det_thresh = args.track_thresh + 0.1 + self.buffer_size = int(frame_rate / 30.0 * 30) + self.max_time_lost = self.buffer_size + self.max_per_image = args.num_queries + self.kalman_filter = KalmanFilter() + + def update(self, output_results): + self.frame_id += 1 + activated_starcks = [] + refind_stracks = [] + lost_stracks = [] + removed_stracks = [] + + scores = output_results["scores"].cpu().numpy() + classes = output_results["labels"].cpu().numpy() + bboxes = output_results["boxes"].cpu().numpy() # x1y1x2y2 + + remain_inds = scores > self.args.track_thresh + inds_low = scores > 0.2 + inds_high = scores < self.args.track_thresh + inds_second = np.logical_and(inds_low, inds_high) + dets_second = bboxes[inds_second] + dets = bboxes[remain_inds] + scores_keep = scores[remain_inds] + scores_second = scores[inds_second] + + # vis + ''' + for i in range(0, dets.shape[0]): + bbox = dets[i][0:4] + cv2.rectangle(img0, (bbox[0], bbox[1]), + (bbox[2], bbox[3]), + (0, 255, 0), 2) + cv2.imshow('dets', img0) + cv2.waitKey(0) + id0 = id0-1 + ''' + + if len(dets) > 0: + '''Detections''' + detections = [STrack(STrack.tlbr_to_tlwh(tlbr), s, 30) for + (tlbr, s) in zip(dets, scores_keep)] + else: + detections = [] + + ''' Add newly detected tracklets to tracked_stracks''' + unconfirmed = [] + tracked_stracks = [] # type: list[STrack] + for track in self.tracked_stracks: + if not track.is_activated: + unconfirmed.append(track) + else: + tracked_stracks.append(track) + + ''' Step 2: First association, with Kalman and IOU''' + strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) + # Predict the current location with KF + STrack.multi_predict(strack_pool) + dists = matching.iou_distance(strack_pool, detections) + #dists = matching.fuse_motion(self.kalman_filter, dists, strack_pool, detections) + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.8) + + for itracked, idet in matches: + track = strack_pool[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + track.update(detections[idet], self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + ''' Step 3: Second association, with IOU''' + # association the untrack to the low score detections + if len(dets_second) > 0: + '''Detections''' + detections_second = [STrack(STrack.tlbr_to_tlwh(tlbr), s, 30) for + (tlbr, s) in zip(dets_second, scores_second)] + else: + detections_second = [] + r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] + dists = matching.iou_distance(r_tracked_stracks, detections_second) + matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.4) + for itracked, idet in matches: + track = r_tracked_stracks[itracked] + det = detections_second[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + for it in u_track: + #track = r_tracked_stracks[it] + track = r_tracked_stracks[it] + if not track.state == TrackState.Lost: + track.mark_lost() + lost_stracks.append(track) + + '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' + detections = [detections[i] for i in u_detection] + dists = matching.iou_distance(unconfirmed, detections) + matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) + for itracked, idet in matches: + unconfirmed[itracked].update(detections[idet], self.frame_id) + activated_starcks.append(unconfirmed[itracked]) + for it in u_unconfirmed: + track = unconfirmed[it] + track.mark_removed() + removed_stracks.append(track) + + """ Step 4: Init new stracks""" + for inew in u_detection: + track = detections[inew] + if track.score < self.det_thresh: + continue + track.activate(self.kalman_filter, self.frame_id) + activated_starcks.append(track) + """ Step 5: Update state""" + for track in self.lost_stracks: + if self.frame_id - track.end_frame > self.max_time_lost: + track.mark_removed() + removed_stracks.append(track) + + # print('Ramained match {} s'.format(t4-t3)) + + self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] + self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) + self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) + self.lost_stracks.extend(lost_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) + self.removed_stracks.extend(removed_stracks) + self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) + # get scores of lost tracks + #output_stracks = [track for track in self.tracked_stracks if track.is_activated] + ret = list() + for track in self.tracked_stracks: + if track.is_activated: + track_dict = {} + track_dict['tracking_id'] = track.track_id + track_dict['active'] = 1 + track_dict['bbox'] = track.tlbr + track_dict['score'] = track.score + ret.append(track_dict) + + return copy.deepcopy(ret) + + +def joint_stracks(tlista, tlistb): + exists = {} + res = [] + for t in tlista: + exists[t.track_id] = 1 + res.append(t) + for t in tlistb: + tid = t.track_id + if not exists.get(tid, 0): + exists[tid] = 1 + res.append(t) + return res + + +def sub_stracks(tlista, tlistb): + stracks = {} + for t in tlista: + stracks[t.track_id] = t + for t in tlistb: + tid = t.track_id + if stracks.get(tid, 0): + del stracks[tid] + return list(stracks.values()) + + +def remove_duplicate_stracks(stracksa, stracksb): + pdist = matching.iou_distance(stracksa, stracksb) + pairs = np.where(pdist < 0.15) + dupa, dupb = list(), list() + for p, q in zip(*pairs): + timep = stracksa[p].frame_id - stracksa[p].start_frame + timeq = stracksb[q].frame_id - stracksb[q].start_frame + if timep > timeq: + dupb.append(q) + else: + dupa.append(p) + resa = [t for i, t in enumerate(stracksa) if not i in dupa] + resb = [t for i, t in enumerate(stracksb) if not i in dupb] + return resa, resb + + +def remove_fp_stracks(stracksa, n_frame=10): + remain = [] + for t in stracksa: + score_5 = t.score_list[-n_frame:] + score_5 = np.array(score_5, dtype=np.float32) + index = score_5 < 0.45 + num = np.sum(index) + if num < n_frame: + remain.append(t) + return remain \ No newline at end of file diff --git a/tutorials/transtrack/mot_online/kalman_filter.py b/tutorials/transtrack/mot_online/kalman_filter.py new file mode 100644 index 0000000000000000000000000000000000000000..deda8a26292b81bc6512a8f6145afabde6c16d7a --- /dev/null +++ b/tutorials/transtrack/mot_online/kalman_filter.py @@ -0,0 +1,270 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np +import scipy.linalg + + +""" +Table for the 0.95 quantile of the chi-square distribution with N degrees of +freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv +function and used as Mahalanobis gating threshold. +""" +chi2inv95 = { + 1: 3.8415, + 2: 5.9915, + 3: 7.8147, + 4: 9.4877, + 5: 11.070, + 6: 12.592, + 7: 14.067, + 8: 15.507, + 9: 16.919} + + +class KalmanFilter(object): + """ + A simple Kalman filter for tracking bounding boxes in image space. + + The 8-dimensional state space + + x, y, a, h, vx, vy, va, vh + + contains the bounding box center position (x, y), aspect ratio a, height h, + and their respective velocities. + + Object motion follows a constant velocity model. The bounding box location + (x, y, a, h) is taken as direct observation of the state space (linear + observation model). + + """ + + def __init__(self): + ndim, dt = 4, 1. + + # Create Kalman filter model matrices. + self._motion_mat = np.eye(2 * ndim, 2 * ndim) + for i in range(ndim): + self._motion_mat[i, ndim + i] = dt + self._update_mat = np.eye(ndim, 2 * ndim) + + # Motion and observation uncertainty are chosen relative to the current + # state estimate. These weights control the amount of uncertainty in + # the model. This is a bit hacky. + self._std_weight_position = 1. / 20 + self._std_weight_velocity = 1. / 160 + + def initiate(self, measurement): + """Create track from unassociated measurement. + + Parameters + ---------- + measurement : ndarray + Bounding box coordinates (x, y, a, h) with center position (x, y), + aspect ratio a, and height h. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector (8 dimensional) and covariance matrix (8x8 + dimensional) of the new track. Unobserved velocities are initialized + to 0 mean. + + """ + mean_pos = measurement + mean_vel = np.zeros_like(mean_pos) + mean = np.r_[mean_pos, mean_vel] + + std = [ + 2 * self._std_weight_position * measurement[3], + 2 * self._std_weight_position * measurement[3], + 1e-2, + 2 * self._std_weight_position * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 1e-5, + 10 * self._std_weight_velocity * measurement[3]] + covariance = np.diag(np.square(std)) + return mean, covariance + + def predict(self, mean, covariance): + """Run Kalman filter prediction step. + + Parameters + ---------- + mean : ndarray + The 8 dimensional mean vector of the object state at the previous + time step. + covariance : ndarray + The 8x8 dimensional covariance matrix of the object state at the + previous time step. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + + """ + std_pos = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-2, + self._std_weight_position * mean[3]] + std_vel = [ + self._std_weight_velocity * mean[3], + self._std_weight_velocity * mean[3], + 1e-5, + self._std_weight_velocity * mean[3]] + motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) + + #mean = np.dot(self._motion_mat, mean) + mean = np.dot(mean, self._motion_mat.T) + covariance = np.linalg.multi_dot(( + self._motion_mat, covariance, self._motion_mat.T)) + motion_cov + + return mean, covariance + + def project(self, mean, covariance): + """Project state distribution to measurement space. + + Parameters + ---------- + mean : ndarray + The state's mean vector (8 dimensional array). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + + Returns + ------- + (ndarray, ndarray) + Returns the projected mean and covariance matrix of the given state + estimate. + + """ + std = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-1, + self._std_weight_position * mean[3]] + innovation_cov = np.diag(np.square(std)) + + mean = np.dot(self._update_mat, mean) + covariance = np.linalg.multi_dot(( + self._update_mat, covariance, self._update_mat.T)) + return mean, covariance + innovation_cov + + def multi_predict(self, mean, covariance): + """Run Kalman filter prediction step (Vectorized version). + Parameters + ---------- + mean : ndarray + The Nx8 dimensional mean matrix of the object states at the previous + time step. + covariance : ndarray + The Nx8x8 dimensional covariance matrics of the object states at the + previous time step. + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[:, 3], + self._std_weight_position * mean[:, 3], + 1e-2 * np.ones_like(mean[:, 3]), + self._std_weight_position * mean[:, 3]] + std_vel = [ + self._std_weight_velocity * mean[:, 3], + self._std_weight_velocity * mean[:, 3], + 1e-5 * np.ones_like(mean[:, 3]), + self._std_weight_velocity * mean[:, 3]] + sqr = np.square(np.r_[std_pos, std_vel]).T + + motion_cov = [] + for i in range(len(mean)): + motion_cov.append(np.diag(sqr[i])) + motion_cov = np.asarray(motion_cov) + + mean = np.dot(mean, self._motion_mat.T) + left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2)) + covariance = np.dot(left, self._motion_mat.T) + motion_cov + + return mean, covariance + + def update(self, mean, covariance, measurement): + """Run Kalman filter correction step. + + Parameters + ---------- + mean : ndarray + The predicted state's mean vector (8 dimensional). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + measurement : ndarray + The 4 dimensional measurement vector (x, y, a, h), where (x, y) + is the center position, a the aspect ratio, and h the height of the + bounding box. + + Returns + ------- + (ndarray, ndarray) + Returns the measurement-corrected state distribution. + + """ + projected_mean, projected_cov = self.project(mean, covariance) + + chol_factor, lower = scipy.linalg.cho_factor( + projected_cov, lower=True, check_finite=False) + kalman_gain = scipy.linalg.cho_solve( + (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, + check_finite=False).T + innovation = measurement - projected_mean + + new_mean = mean + np.dot(innovation, kalman_gain.T) + new_covariance = covariance - np.linalg.multi_dot(( + kalman_gain, projected_cov, kalman_gain.T)) + return new_mean, new_covariance + + def gating_distance(self, mean, covariance, measurements, + only_position=False, metric='maha'): + """Compute gating distance between state distribution and measurements. + A suitable distance threshold can be obtained from `chi2inv95`. If + `only_position` is False, the chi-square distribution has 4 degrees of + freedom, otherwise 2. + Parameters + ---------- + mean : ndarray + Mean vector over the state distribution (8 dimensional). + covariance : ndarray + Covariance of the state distribution (8x8 dimensional). + measurements : ndarray + An Nx4 dimensional matrix of N measurements, each in + format (x, y, a, h) where (x, y) is the bounding box center + position, a the aspect ratio, and h the height. + only_position : Optional[bool] + If True, distance computation is done with respect to the bounding + box center position only. + Returns + ------- + ndarray + Returns an array of length N, where the i-th element contains the + squared Mahalanobis distance between (mean, covariance) and + `measurements[i]`. + """ + mean, covariance = self.project(mean, covariance) + if only_position: + mean, covariance = mean[:2], covariance[:2, :2] + measurements = measurements[:, :2] + + d = measurements - mean + if metric == 'gaussian': + return np.sum(d * d, axis=1) + elif metric == 'maha': + cholesky_factor = np.linalg.cholesky(covariance) + z = scipy.linalg.solve_triangular( + cholesky_factor, d.T, lower=True, check_finite=False, + overwrite_b=True) + squared_maha = np.sum(z * z, axis=0) + return squared_maha + else: + raise ValueError('invalid distance metric') \ No newline at end of file diff --git a/tutorials/transtrack/mot_online/matching.py b/tutorials/transtrack/mot_online/matching.py new file mode 100644 index 0000000000000000000000000000000000000000..d21c958237a64abf185f5298a62d2bcb9270e254 --- /dev/null +++ b/tutorials/transtrack/mot_online/matching.py @@ -0,0 +1,156 @@ +import cv2 +import numpy as np +import scipy +import lap +from scipy.spatial.distance import cdist + +from cython_bbox import bbox_overlaps as bbox_ious +from mot_online import kalman_filter +import time + +def merge_matches(m1, m2, shape): + O,P,Q = shape + m1 = np.asarray(m1) + m2 = np.asarray(m2) + + M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P)) + M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q)) + + mask = M1*M2 + match = mask.nonzero() + match = list(zip(match[0], match[1])) + unmatched_O = tuple(set(range(O)) - set([i for i, j in match])) + unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match])) + + return match, unmatched_O, unmatched_Q + + +def _indices_to_matches(cost_matrix, indices, thresh): + matched_cost = cost_matrix[tuple(zip(*indices))] + matched_mask = (matched_cost <= thresh) + + matches = indices[matched_mask] + unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0])) + unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1])) + + return matches, unmatched_a, unmatched_b + + +def linear_assignment(cost_matrix, thresh): + if cost_matrix.size == 0: + return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) + matches, unmatched_a, unmatched_b = [], [], [] + cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) + for ix, mx in enumerate(x): + if mx >= 0: + matches.append([ix, mx]) + unmatched_a = np.where(x < 0)[0] + unmatched_b = np.where(y < 0)[0] + matches = np.asarray(matches) + return matches, unmatched_a, unmatched_b + + +def ious(atlbrs, btlbrs): + """ + Compute cost based on IoU + :type atlbrs: list[tlbr] | np.ndarray + :type atlbrs: list[tlbr] | np.ndarray + + :rtype ious np.ndarray + """ + ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) + if ious.size == 0: + return ious + + ious = bbox_ious( + np.ascontiguousarray(atlbrs, dtype=np.float), + np.ascontiguousarray(btlbrs, dtype=np.float) + ) + + return ious + + +def iou_distance(atracks, btracks): + """ + Compute cost based on IoU + :type atracks: list[STrack] + :type btracks: list[STrack] + + :rtype cost_matrix np.ndarray + """ + + if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): + atlbrs = atracks + btlbrs = btracks + else: + atlbrs = [track.tlbr for track in atracks] + btlbrs = [track.tlbr for track in btracks] + _ious = ious(atlbrs, btlbrs) + cost_matrix = 1 - _ious + + return cost_matrix + +def embedding_distance(tracks, detections, metric='cosine'): + """ + :param tracks: list[STrack] + :param detections: list[BaseTrack] + :param metric: + :return: cost_matrix np.ndarray + """ + + cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) + if cost_matrix.size == 0: + return cost_matrix + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + return cost_matrix + + +def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False): + if cost_matrix.size == 0: + return cost_matrix + gating_dim = 2 if only_position else 4 + gating_threshold = kalman_filter.chi2inv95[gating_dim] + measurements = np.asarray([det.to_xyah() for det in detections]) + for row, track in enumerate(tracks): + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position) + cost_matrix[row, gating_distance > gating_threshold] = np.inf + return cost_matrix + + +def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98): + if cost_matrix.size == 0: + return cost_matrix + gating_dim = 2 if only_position else 4 + gating_threshold = kalman_filter.chi2inv95[gating_dim] + measurements = np.asarray([det.to_xyah() for det in detections]) + for row, track in enumerate(tracks): + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position, metric='maha') + cost_matrix[row, gating_distance > gating_threshold] = np.inf + cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance + return cost_matrix + + +def fuse_iou(cost_matrix, tracks, detections): + if cost_matrix.size == 0: + return cost_matrix + reid_sim = 1 - cost_matrix + iou_dist = iou_distance(tracks, detections) + iou_sim = 1 - iou_dist + fuse_sim = reid_sim * (1 + iou_sim) / 2 + det_scores = np.array([det.score for det in detections]) + det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0) + #fuse_sim = fuse_sim * (1 + det_scores) / 2 + fuse_cost = 1 - fuse_sim + return fuse_cost + + +def fuse_iou_add(cost_matrix, tracks, detections, weight=0.5): + if cost_matrix.size == 0: + return cost_matrix + iou_dist = iou_distance(tracks, detections) + fuse_dist = weight * iou_dist + (1 - weight) * cost_matrix + return fuse_dist \ No newline at end of file diff --git a/tutorials/transtrack/save_track.py b/tutorials/transtrack/save_track.py new file mode 100644 index 0000000000000000000000000000000000000000..7a0517c8620d2868b056b7b84c3e5c41713d06f3 --- /dev/null +++ b/tutorials/transtrack/save_track.py @@ -0,0 +1,52 @@ +""" +Copyright (c) https://github.com/xingyizhou/CenterTrack +Modified by Peize Sun, Rufeng Zhang +""" +# coding: utf-8 +import os +import json +import logging +from collections import defaultdict + + +def save_track(results, out_root, video_to_images, video_names, data_split='val'): + assert out_root is not None + out_dir = os.path.join(out_root, data_split) + if not os.path.exists(out_dir): + os.mkdir(out_dir) + + # save json. + # json_path = os.path.join(out_dir, "track_results.json") + # with open(json_path, "w") as f: + # f.write(json.dumps(results)) + # f.flush() + + # save it in standard format. + track_dir = os.path.join(out_dir, "tracks") + if not os.path.exists(track_dir): + os.mkdir(track_dir) + for video_id in video_to_images.keys(): + video_infos = video_to_images[video_id] + video_name = video_names[video_id] + file_path = os.path.join(track_dir, "{}.txt".format(video_name)) + f = open(file_path, "w") + tracks = defaultdict(list) + for video_info in video_infos: + image_id, frame_id = video_info["image_id"], video_info["frame_id"] + result = results[image_id] + for item in result: + if not ("tracking_id" in item): + raise NotImplementedError + tracking_id = item["tracking_id"] + bbox = item["bbox"] + bbox = [bbox[0], bbox[1], bbox[2], bbox[3], item['score'], item['active']] + tracks[tracking_id].append([frame_id] + bbox) + + rename_track_id = 0 + for track_id in sorted(tracks): + rename_track_id += 1 + for t in tracks[track_id]: + if t[6] > 0: + f.write("{},{},{:.2f},{:.2f},{:.2f},{:.2f},-1,-1,-1,-1\n".format( + t[0], rename_track_id, t[1], t[2], t[3] - t[1], t[4] - t[2])) + f.close() diff --git a/tutorials/transtrack/tracker.py b/tutorials/transtrack/tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..3e1f300bb35dc3b20f59dc912a35db3d8a07fd40 --- /dev/null +++ b/tutorials/transtrack/tracker.py @@ -0,0 +1,191 @@ +""" +Copyright (c) https://github.com/xingyizhou/CenterTrack +Modified by Peize Sun, Rufeng Zhang +""" +# coding: utf-8 +import torch +from scipy.optimize import linear_sum_assignment +from util import box_ops +import copy + +class Tracker(object): + def __init__(self, score_thresh, max_age=32): + self.score_thresh = score_thresh + self.low_thresh = 0.2 + self.high_thresh = score_thresh + 0.1 + self.max_age = max_age + self.id_count = 0 + self.tracks_dict = dict() + self.tracks = list() + self.unmatched_tracks = list() + self.reset_all() + + def reset_all(self): + self.id_count = 0 + self.tracks_dict = dict() + self.tracks = list() + self.unmatched_tracks = list() + + def init_track(self, results): + + scores = results["scores"] + classes = results["labels"] + bboxes = results["boxes"] # x1y1x2y2 + + ret = list() + ret_dict = dict() + for idx in range(scores.shape[0]): + if scores[idx] >= self.score_thresh: + self.id_count += 1 + obj = dict() + obj["score"] = float(scores[idx]) + obj["bbox"] = bboxes[idx, :].cpu().numpy().tolist() + obj["tracking_id"] = self.id_count + obj['active'] = 1 + obj['age'] = 1 + ret.append(obj) + ret_dict[idx] = obj + + self.tracks = ret + self.tracks_dict = ret_dict + return copy.deepcopy(ret) + + + def step(self, output_results): + scores = output_results["scores"] + bboxes = output_results["boxes"] # x1y1x2y2 + track_bboxes = output_results["track_boxes"] if "track_boxes" in output_results else None # x1y1x2y2 + + results = list() + results_dict = dict() + results_second = list() + + tracks = list() + + for idx in range(scores.shape[0]): + if idx in self.tracks_dict and track_bboxes is not None: + self.tracks_dict[idx]["bbox"] = track_bboxes[idx, :].cpu().numpy().tolist() + + if scores[idx] >= self.score_thresh: + obj = dict() + obj["score"] = float(scores[idx]) + obj["bbox"] = bboxes[idx, :].cpu().numpy().tolist() + results.append(obj) + results_dict[idx] = obj + elif scores[idx] >= self.low_thresh: + second_obj = dict() + second_obj["score"] = float(scores[idx]) + second_obj["bbox"] = bboxes[idx, :].cpu().numpy().tolist() + results_second.append(second_obj) + results_dict[idx] = second_obj + + tracks = [v for v in self.tracks_dict.values()] + self.unmatched_tracks + # for trackss in tracks: + # print(trackss.keys()) + N = len(results) + M = len(tracks) + + ret = list() + unmatched_tracks = [t for t in range(M)] + unmatched_dets = [d for d in range(N)] + + if N > 0 and M > 0: + det_box = torch.stack([torch.tensor(obj['bbox']) for obj in results], dim=0) # N x 4 + track_box = torch.stack([torch.tensor(obj['bbox']) for obj in tracks], dim=0) # M x 4 + cost_bbox = 1.0 - box_ops.generalized_box_iou(det_box, track_box) # N x M + + matched_indices = linear_sum_assignment(cost_bbox) + unmatched_dets = [d for d in range(N) if not (d in matched_indices[0])] + unmatched_tracks = [d for d in range(M) if not (d in matched_indices[1])] + + matches = [[],[]] + for (m0, m1) in zip(matched_indices[0], matched_indices[1]): + if cost_bbox[m0, m1] > 1.2: + unmatched_dets.append(m0) + unmatched_tracks.append(m1) + else: + matches[0].append(m0) + matches[1].append(m1) + + for (m0, m1) in zip(matches[0], matches[1]): + track = results[m0] + track['tracking_id'] = tracks[m1]['tracking_id'] + track['age'] = 1 + track['active'] = 1 + ret.append(track) + + # second association + N_second = len(results_second) + unmatched_tracks_obj = list() + for i in unmatched_tracks: + #print(tracks[i].keys()) + track = tracks[i] + if track['active'] == 1: + unmatched_tracks_obj.append(track) + M_second = len(unmatched_tracks_obj) + unmatched_tracks_second = [t for t in range(M_second)] + + if N_second > 0 and M_second > 0: + det_box_second = torch.stack([torch.tensor(obj['bbox']) for obj in results_second], dim=0) # N_second x 4 + track_box_second = torch.stack([torch.tensor(obj['bbox']) for obj in unmatched_tracks_obj], dim=0) # M_second x 4 + cost_bbox_second = 1.0 - box_ops.generalized_box_iou(det_box_second, track_box_second) # N_second x M_second + + matched_indices_second = linear_sum_assignment(cost_bbox_second) + unmatched_tracks_second = [d for d in range(M_second) if not (d in matched_indices_second[1])] + + matches_second = [[],[]] + for (m0, m1) in zip(matched_indices_second[0], matched_indices_second[1]): + if cost_bbox_second[m0, m1] > 0.8: + unmatched_tracks_second.append(m1) + else: + matches_second[0].append(m0) + matches_second[1].append(m1) + + for (m0, m1) in zip(matches_second[0], matches_second[1]): + track = results_second[m0] + track['tracking_id'] = unmatched_tracks_obj[m1]['tracking_id'] + track['age'] = 1 + track['active'] = 1 + ret.append(track) + + for i in unmatched_dets: + trackd = results[i] + if trackd["score"] >= self.high_thresh: + self.id_count += 1 + trackd['tracking_id'] = self.id_count + trackd['age'] = 1 + trackd['active'] = 1 + ret.append(trackd) + + # ------------------------------------------------------ # + ret_unmatched_tracks = [] + + for j in unmatched_tracks: + track = tracks[j] + if track['active'] == 0 and track['age'] < self.max_age: + track['age'] += 1 + track['active'] = 0 + ret.append(track) + ret_unmatched_tracks.append(track) + + for i in unmatched_tracks_second: + track = unmatched_tracks_obj[i] + if track['age'] < self.max_age: + track['age'] += 1 + track['active'] = 0 + ret.append(track) + ret_unmatched_tracks.append(track) + + # for i in unmatched_tracks: + # track = tracks[i] + # if track['age'] < self.max_age: + # track['age'] += 1 + # track['active'] = 0 + # ret.append(track) + # ret_unmatched_tracks.append(track) + #print(len(ret_unmatched_tracks)) + + self.tracks = ret + self.tracks_dict = {red_ind:red for red_ind, red in results_dict.items() if 'tracking_id' in red} + self.unmatched_tracks = ret_unmatched_tracks + return copy.deepcopy(ret) diff --git a/yolox/__init__.py b/yolox/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1cbc411d419c55098e7d4e24ff0f21caaaf10a1f --- /dev/null +++ b/yolox/__init__.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- + +from .utils import configure_module + +configure_module() + +__version__ = "0.1.0" diff --git a/yolox/core/__init__.py b/yolox/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c2379c704ec6320066cbb45a6b8dacca548662a0 --- /dev/null +++ b/yolox/core/__init__.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +from .launch import launch +from .trainer import Trainer diff --git a/yolox/core/launch.py b/yolox/core/launch.py new file mode 100644 index 0000000000000000000000000000000000000000..2fd5eaa765d7da2193f16f0fc463d001f6c4d5c5 --- /dev/null +++ b/yolox/core/launch.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Code are based on +# https://github.com/facebookresearch/detectron2/blob/master/detectron2/engine/launch.py +# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) Megvii, Inc. and its affiliates. + +from loguru import logger + +import torch +import torch.distributed as dist +import torch.multiprocessing as mp + +import yolox.utils.dist as comm +from yolox.utils import configure_nccl + +import os +import subprocess +import sys +import time + +__all__ = ["launch"] + + +def _find_free_port(): + """ + Find an available port of current machine / node. + """ + import socket + + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + # Binding to port 0 will cause the OS to find an available port for us + sock.bind(("", 0)) + port = sock.getsockname()[1] + sock.close() + # NOTE: there is still a chance the port could be taken by other processes. + return port + + +def launch( + main_func, + num_gpus_per_machine, + num_machines=1, + machine_rank=0, + backend="nccl", + dist_url=None, + args=(), +): + """ + Args: + main_func: a function that will be called by `main_func(*args)` + num_machines (int): the total number of machines + machine_rank (int): the rank of this machine (one per machine) + dist_url (str): url to connect to for distributed training, including protocol + e.g. "tcp://127.0.0.1:8686". + Can be set to auto to automatically select a free port on localhost + args (tuple): arguments passed to main_func + """ + world_size = num_machines * num_gpus_per_machine + if world_size > 1: + if int(os.environ.get("WORLD_SIZE", "1")) > 1: + dist_url = "{}:{}".format( + os.environ.get("MASTER_ADDR", None), + os.environ.get("MASTER_PORT", "None"), + ) + local_rank = int(os.environ.get("LOCAL_RANK", "0")) + world_size = int(os.environ.get("WORLD_SIZE", "1")) + _distributed_worker( + local_rank, + main_func, + world_size, + num_gpus_per_machine, + num_machines, + machine_rank, + backend, + dist_url, + args, + ) + exit() + launch_by_subprocess( + sys.argv, + world_size, + num_machines, + machine_rank, + num_gpus_per_machine, + dist_url, + args, + ) + else: + main_func(*args) + + +def launch_by_subprocess( + raw_argv, + world_size, + num_machines, + machine_rank, + num_gpus_per_machine, + dist_url, + args, +): + assert ( + world_size > 1 + ), "subprocess mode doesn't support single GPU, use spawn mode instead" + + if dist_url is None: + # ------------------------hack for multi-machine training -------------------- # + if num_machines > 1: + master_ip = subprocess.check_output(["hostname", "--fqdn"]).decode("utf-8") + master_ip = str(master_ip).strip() + dist_url = "tcp://{}".format(master_ip) + ip_add_file = "./" + args[1].experiment_name + "_ip_add.txt" + if machine_rank == 0: + port = _find_free_port() + with open(ip_add_file, "w") as ip_add: + ip_add.write(dist_url+'\n') + ip_add.write(str(port)) + else: + while not os.path.exists(ip_add_file): + time.sleep(0.5) + + with open(ip_add_file, "r") as ip_add: + dist_url = ip_add.readline().strip() + port = ip_add.readline() + else: + dist_url = "tcp://127.0.0.1" + port = _find_free_port() + + # set PyTorch distributed related environmental variables + current_env = os.environ.copy() + current_env["MASTER_ADDR"] = dist_url + current_env["MASTER_PORT"] = str(port) + current_env["WORLD_SIZE"] = str(world_size) + assert num_gpus_per_machine <= torch.cuda.device_count() + + if "OMP_NUM_THREADS" not in os.environ and num_gpus_per_machine > 1: + current_env["OMP_NUM_THREADS"] = str(1) + logger.info( + "\n*****************************************\n" + "Setting OMP_NUM_THREADS environment variable for each process " + "to be {} in default, to avoid your system being overloaded, " + "please further tune the variable for optimal performance in " + "your application as needed. \n" + "*****************************************".format( + current_env["OMP_NUM_THREADS"] + ) + ) + + processes = [] + for local_rank in range(0, num_gpus_per_machine): + # each process's rank + dist_rank = machine_rank * num_gpus_per_machine + local_rank + current_env["RANK"] = str(dist_rank) + current_env["LOCAL_RANK"] = str(local_rank) + + # spawn the processes + cmd = ["python3", *raw_argv] + + process = subprocess.Popen(cmd, env=current_env) + processes.append(process) + + for process in processes: + process.wait() + if process.returncode != 0: + raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) + + +def _distributed_worker( + local_rank, + main_func, + world_size, + num_gpus_per_machine, + num_machines, + machine_rank, + backend, + dist_url, + args, +): + assert ( + torch.cuda.is_available() + ), "cuda is not available. Please check your installation." + configure_nccl() + global_rank = machine_rank * num_gpus_per_machine + local_rank + logger.info("Rank {} initialization finished.".format(global_rank)) + try: + dist.init_process_group( + backend=backend, + init_method=dist_url, + world_size=world_size, + rank=global_rank, + ) + except Exception: + logger.error("Process group URL: {}".format(dist_url)) + raise + # synchronize is needed here to prevent a possible timeout after calling init_process_group + # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172 + comm.synchronize() + + if global_rank == 0 and os.path.exists( + "./" + args[1].experiment_name + "_ip_add.txt" + ): + os.remove("./" + args[1].experiment_name + "_ip_add.txt") + + assert num_gpus_per_machine <= torch.cuda.device_count() + torch.cuda.set_device(local_rank) + + args[1].local_rank = local_rank + args[1].num_machines = num_machines + + # Setup the local process group (which contains ranks within the same machine) + # assert comm._LOCAL_PROCESS_GROUP is None + # num_machines = world_size // num_gpus_per_machine + # for i in range(num_machines): + # ranks_on_i = list(range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine)) + # pg = dist.new_group(ranks_on_i) + # if i == machine_rank: + # comm._LOCAL_PROCESS_GROUP = pg + + main_func(*args) diff --git a/yolox/core/trainer.py b/yolox/core/trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..b0063399d8b2fa055ac2af1f9049f6b0c5cd8162 --- /dev/null +++ b/yolox/core/trainer.py @@ -0,0 +1,327 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +from loguru import logger + +import torch + +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.utils.tensorboard import SummaryWriter + +from yolox.data import DataPrefetcher +from yolox.utils import ( + MeterBuffer, + ModelEMA, + all_reduce_norm, + get_model_info, + get_rank, + get_world_size, + gpu_mem_usage, + load_ckpt, + occupy_mem, + save_checkpoint, + setup_logger, + synchronize +) + +import datetime +import os +import time + + +class Trainer: + def __init__(self, exp, args): + # init function only defines some basic attr, other attrs like model, optimizer are built in + # before_train methods. + self.exp = exp + self.args = args + + # training related attr + self.max_epoch = exp.max_epoch + self.amp_training = args.fp16 + self.scaler = torch.cuda.amp.GradScaler(enabled=args.fp16) + self.is_distributed = get_world_size() > 1 + self.rank = get_rank() + self.local_rank = args.local_rank + self.device = "cuda:{}".format(self.local_rank) + self.use_model_ema = exp.ema + + # data/dataloader related attr + self.data_type = torch.float16 if args.fp16 else torch.float32 + self.input_size = exp.input_size + self.best_ap = 0 + + # metric record + self.meter = MeterBuffer(window_size=exp.print_interval) + self.file_name = os.path.join(exp.output_dir, args.experiment_name) + + if self.rank == 0: + os.makedirs(self.file_name, exist_ok=True) + + setup_logger( + self.file_name, + distributed_rank=self.rank, + filename="train_log.txt", + mode="a", + ) + + def train(self): + self.before_train() + try: + self.train_in_epoch() + except Exception: + raise + finally: + self.after_train() + + def train_in_epoch(self): + for self.epoch in range(self.start_epoch, self.max_epoch): + self.before_epoch() + self.train_in_iter() + self.after_epoch() + + def train_in_iter(self): + for self.iter in range(self.max_iter): + self.before_iter() + self.train_one_iter() + self.after_iter() + + def train_one_iter(self): + iter_start_time = time.time() + + inps, targets = self.prefetcher.next() + inps = inps.to(self.data_type) + targets = targets.to(self.data_type) + targets.requires_grad = False + data_end_time = time.time() + + with torch.cuda.amp.autocast(enabled=self.amp_training): + outputs = self.model(inps, targets) + loss = outputs["total_loss"] + + self.optimizer.zero_grad() + self.scaler.scale(loss).backward() + self.scaler.step(self.optimizer) + self.scaler.update() + + if self.use_model_ema: + self.ema_model.update(self.model) + + lr = self.lr_scheduler.update_lr(self.progress_in_iter + 1) + for param_group in self.optimizer.param_groups: + param_group["lr"] = lr + + iter_end_time = time.time() + self.meter.update( + iter_time=iter_end_time - iter_start_time, + data_time=data_end_time - iter_start_time, + lr=lr, + **outputs, + ) + + def before_train(self): + logger.info("args: {}".format(self.args)) + logger.info("exp value:\n{}".format(self.exp)) + + # model related init + torch.cuda.set_device(self.local_rank) + model = self.exp.get_model() + logger.info( + "Model Summary: {}".format(get_model_info(model, self.exp.test_size)) + ) + model.to(self.device) + + # solver related init + self.optimizer = self.exp.get_optimizer(self.args.batch_size) + + # value of epoch will be set in `resume_train` + model = self.resume_train(model) + + # data related init + self.no_aug = self.start_epoch >= self.max_epoch - self.exp.no_aug_epochs + self.train_loader = self.exp.get_data_loader( + batch_size=self.args.batch_size, + is_distributed=self.is_distributed, + no_aug=self.no_aug, + ) + logger.info("init prefetcher, this might take one minute or less...") + self.prefetcher = DataPrefetcher(self.train_loader) + # max_iter means iters per epoch + self.max_iter = len(self.train_loader) + + self.lr_scheduler = self.exp.get_lr_scheduler( + self.exp.basic_lr_per_img * self.args.batch_size, self.max_iter + ) + if self.args.occupy: + occupy_mem(self.local_rank) + + if self.is_distributed: + model = DDP(model, device_ids=[self.local_rank], broadcast_buffers=False) + + if self.use_model_ema: + self.ema_model = ModelEMA(model, 0.9998) + self.ema_model.updates = self.max_iter * self.start_epoch + + self.model = model + self.model.train() + + self.evaluator = self.exp.get_evaluator( + batch_size=self.args.batch_size, is_distributed=self.is_distributed + ) + # Tensorboard logger + if self.rank == 0: + self.tblogger = SummaryWriter(self.file_name) + + logger.info("Training start...") + #logger.info("\n{}".format(model)) + + def after_train(self): + logger.info( + "Training of experiment is done and the best AP is {:.2f}".format( + self.best_ap * 100 + ) + ) + + def before_epoch(self): + logger.info("---> start train epoch{}".format(self.epoch + 1)) + + if self.epoch + 1 == self.max_epoch - self.exp.no_aug_epochs or self.no_aug: + + logger.info("--->No mosaic aug now!") + self.train_loader.close_mosaic() + logger.info("--->Add additional L1 loss now!") + if self.is_distributed: + self.model.module.head.use_l1 = True + else: + self.model.head.use_l1 = True + + self.exp.eval_interval = 1 + if not self.no_aug: + self.save_ckpt(ckpt_name="last_mosaic_epoch") + + def after_epoch(self): + if self.use_model_ema: + self.ema_model.update_attr(self.model) + + self.save_ckpt(ckpt_name="latest") + + if (self.epoch + 1) % self.exp.eval_interval == 0: + all_reduce_norm(self.model) + self.evaluate_and_save_model() + + def before_iter(self): + pass + + def after_iter(self): + """ + `after_iter` contains two parts of logic: + * log information + * reset setting of resize + """ + # log needed information + if (self.iter + 1) % self.exp.print_interval == 0: + # TODO check ETA logic + left_iters = self.max_iter * self.max_epoch - (self.progress_in_iter + 1) + eta_seconds = self.meter["iter_time"].global_avg * left_iters + eta_str = "ETA: {}".format(datetime.timedelta(seconds=int(eta_seconds))) + + progress_str = "epoch: {}/{}, iter: {}/{}".format( + self.epoch + 1, self.max_epoch, self.iter + 1, self.max_iter + ) + loss_meter = self.meter.get_filtered_meter("loss") + loss_str = ", ".join( + ["{}: {:.3f}".format(k, v.latest) for k, v in loss_meter.items()] + ) + + time_meter = self.meter.get_filtered_meter("time") + time_str = ", ".join( + ["{}: {:.3f}s".format(k, v.avg) for k, v in time_meter.items()] + ) + + logger.info( + "{}, mem: {:.0f}Mb, {}, {}, lr: {:.3e}".format( + progress_str, + gpu_mem_usage(), + time_str, + loss_str, + self.meter["lr"].latest, + ) + + (", size: {:d}, {}".format(self.input_size[0], eta_str)) + ) + self.meter.clear_meters() + + # random resizing + if self.exp.random_size is not None and (self.progress_in_iter + 1) % 10 == 0: + self.input_size = self.exp.random_resize( + self.train_loader, self.epoch, self.rank, self.is_distributed + ) + + @property + def progress_in_iter(self): + return self.epoch * self.max_iter + self.iter + + def resume_train(self, model): + if self.args.resume: + logger.info("resume training") + if self.args.ckpt is None: + ckpt_file = os.path.join(self.file_name, "latest" + "_ckpt.pth.tar") + else: + ckpt_file = self.args.ckpt + + ckpt = torch.load(ckpt_file, map_location=self.device) + # resume the model/optimizer state dict + model.load_state_dict(ckpt["model"]) + self.optimizer.load_state_dict(ckpt["optimizer"]) + start_epoch = ( + self.args.start_epoch - 1 + if self.args.start_epoch is not None + else ckpt["start_epoch"] + ) + self.start_epoch = start_epoch + logger.info( + "loaded checkpoint '{}' (epoch {})".format( + self.args.resume, self.start_epoch + ) + ) # noqa + else: + if self.args.ckpt is not None: + logger.info("loading checkpoint for fine tuning") + ckpt_file = self.args.ckpt + ckpt = torch.load(ckpt_file, map_location=self.device)["model"] + model = load_ckpt(model, ckpt) + self.start_epoch = 0 + + return model + + def evaluate_and_save_model(self): + evalmodel = self.ema_model.ema if self.use_model_ema else self.model + ap50_95, ap50, summary = self.exp.eval( + evalmodel, self.evaluator, self.is_distributed + ) + self.model.train() + if self.rank == 0: + self.tblogger.add_scalar("val/COCOAP50", ap50, self.epoch + 1) + self.tblogger.add_scalar("val/COCOAP50_95", ap50_95, self.epoch + 1) + logger.info("\n" + summary) + synchronize() + + #self.best_ap = max(self.best_ap, ap50_95) + self.save_ckpt("last_epoch", ap50 > self.best_ap) + self.best_ap = max(self.best_ap, ap50) + + def save_ckpt(self, ckpt_name, update_best_ckpt=False): + if self.rank == 0: + save_model = self.ema_model.ema if self.use_model_ema else self.model + logger.info("Save weights to {}".format(self.file_name)) + ckpt_state = { + "start_epoch": self.epoch + 1, + "model": save_model.state_dict(), + "optimizer": self.optimizer.state_dict(), + } + save_checkpoint( + ckpt_state, + update_best_ckpt, + self.file_name, + ckpt_name, + ) diff --git a/yolox/data/__init__.py b/yolox/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..258effe73ee63b9d4296ea1180379ec69c1cf550 --- /dev/null +++ b/yolox/data/__init__.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +from .data_augment import TrainTransform, ValTransform +from .data_prefetcher import DataPrefetcher +from .dataloading import DataLoader, get_yolox_datadir +from .datasets import * +from .samplers import InfiniteSampler, YoloBatchSampler diff --git a/yolox/data/data_augment.py b/yolox/data/data_augment.py new file mode 100644 index 0000000000000000000000000000000000000000..99fb30a284eeb5851e4c776aafd61b44d485196b --- /dev/null +++ b/yolox/data/data_augment.py @@ -0,0 +1,299 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. +""" +Data augmentation functionality. Passed as callable transformations to +Dataset classes. + +The data augmentation procedures were interpreted from @weiliu89's SSD paper +http://arxiv.org/abs/1512.02325 +""" + +import cv2 +import numpy as np + +import torch + +from yolox.utils import xyxy2cxcywh + +import math +import random + + +def augment_hsv(img, hgain=0.015, sgain=0.7, vgain=0.4): + r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains + hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV)) + dtype = img.dtype # uint8 + + x = np.arange(0, 256, dtype=np.int16) + lut_hue = ((x * r[0]) % 180).astype(dtype) + lut_sat = np.clip(x * r[1], 0, 255).astype(dtype) + lut_val = np.clip(x * r[2], 0, 255).astype(dtype) + + img_hsv = cv2.merge( + (cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)) + ).astype(dtype) + cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed + + +def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.2): + # box1(4,n), box2(4,n) + # Compute candidate boxes which include follwing 5 things: + # box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio + w1, h1 = box1[2] - box1[0], box1[3] - box1[1] + w2, h2 = box2[2] - box2[0], box2[3] - box2[1] + ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16)) # aspect ratio + return ( + (w2 > wh_thr) + & (h2 > wh_thr) + & (w2 * h2 / (w1 * h1 + 1e-16) > area_thr) + & (ar < ar_thr) + ) # candidates + + +def random_perspective( + img, + targets=(), + degrees=10, + translate=0.1, + scale=0.1, + shear=10, + perspective=0.0, + border=(0, 0), +): + # targets = [cls, xyxy] + height = img.shape[0] + border[0] * 2 # shape(h,w,c) + width = img.shape[1] + border[1] * 2 + + # Center + C = np.eye(3) + C[0, 2] = -img.shape[1] / 2 # x translation (pixels) + C[1, 2] = -img.shape[0] / 2 # y translation (pixels) + + # Rotation and Scale + R = np.eye(3) + a = random.uniform(-degrees, degrees) + # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations + s = random.uniform(scale[0], scale[1]) + # s = 2 ** random.uniform(-scale, scale) + R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s) + + # Shear + S = np.eye(3) + S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg) + S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg) + + # Translation + T = np.eye(3) + T[0, 2] = ( + random.uniform(0.5 - translate, 0.5 + translate) * width + ) # x translation (pixels) + T[1, 2] = ( + random.uniform(0.5 - translate, 0.5 + translate) * height + ) # y translation (pixels) + + # Combined rotation matrix + M = T @ S @ R @ C # order of operations (right to left) is IMPORTANT + + ########################### + # For Aug out of Mosaic + # s = 1. + # M = np.eye(3) + ########################### + + if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed + if perspective: + img = cv2.warpPerspective( + img, M, dsize=(width, height), borderValue=(114, 114, 114) + ) + else: # affine + img = cv2.warpAffine( + img, M[:2], dsize=(width, height), borderValue=(114, 114, 114) + ) + + # Transform label coordinates + n = len(targets) + if n: + # warp points + xy = np.ones((n * 4, 3)) + xy[:, :2] = targets[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape( + n * 4, 2 + ) # x1y1, x2y2, x1y2, x2y1 + xy = xy @ M.T # transform + if perspective: + xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale + else: # affine + xy = xy[:, :2].reshape(n, 8) + + # create new boxes + x = xy[:, [0, 2, 4, 6]] + y = xy[:, [1, 3, 5, 7]] + xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T + + # clip boxes + #xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width) + #xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height) + + # filter candidates + i = box_candidates(box1=targets[:, :4].T * s, box2=xy.T) + targets = targets[i] + targets[:, :4] = xy[i] + + targets = targets[targets[:, 0] < width] + targets = targets[targets[:, 2] > 0] + targets = targets[targets[:, 1] < height] + targets = targets[targets[:, 3] > 0] + + return img, targets + + +def _distort(image): + def _convert(image, alpha=1, beta=0): + tmp = image.astype(float) * alpha + beta + tmp[tmp < 0] = 0 + tmp[tmp > 255] = 255 + image[:] = tmp + + image = image.copy() + + if random.randrange(2): + _convert(image, beta=random.uniform(-32, 32)) + + if random.randrange(2): + _convert(image, alpha=random.uniform(0.5, 1.5)) + + image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) + + if random.randrange(2): + tmp = image[:, :, 0].astype(int) + random.randint(-18, 18) + tmp %= 180 + image[:, :, 0] = tmp + + if random.randrange(2): + _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5)) + + image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) + + return image + + +def _mirror(image, boxes): + _, width, _ = image.shape + if random.randrange(2): + image = image[:, ::-1] + boxes = boxes.copy() + boxes[:, 0::2] = width - boxes[:, 2::-2] + return image, boxes + + +def preproc(image, input_size, mean, std, swap=(2, 0, 1)): + if len(image.shape) == 3: + padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0 + else: + padded_img = np.ones(input_size) * 114.0 + img = np.array(image) + r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * r), int(img.shape[0] * r)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.float32) + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img + + padded_img = padded_img[:, :, ::-1] + padded_img /= 255.0 + if mean is not None: + padded_img -= mean + if std is not None: + padded_img /= std + padded_img = padded_img.transpose(swap) + padded_img = np.ascontiguousarray(padded_img, dtype=np.float32) + return padded_img, r + + +class TrainTransform: + def __init__(self, p=0.5, rgb_means=None, std=None, max_labels=100): + self.means = rgb_means + self.std = std + self.p = p + self.max_labels = max_labels + + def __call__(self, image, targets, input_dim): + boxes = targets[:, :4].copy() + labels = targets[:, 4].copy() + ids = targets[:, 5].copy() + if len(boxes) == 0: + targets = np.zeros((self.max_labels, 6), dtype=np.float32) + image, r_o = preproc(image, input_dim, self.means, self.std) + image = np.ascontiguousarray(image, dtype=np.float32) + return image, targets + + image_o = image.copy() + targets_o = targets.copy() + height_o, width_o, _ = image_o.shape + boxes_o = targets_o[:, :4] + labels_o = targets_o[:, 4] + ids_o = targets_o[:, 5] + # bbox_o: [xyxy] to [c_x,c_y,w,h] + boxes_o = xyxy2cxcywh(boxes_o) + + image_t = _distort(image) + image_t, boxes = _mirror(image_t, boxes) + height, width, _ = image_t.shape + image_t, r_ = preproc(image_t, input_dim, self.means, self.std) + # boxes [xyxy] 2 [cx,cy,w,h] + boxes = xyxy2cxcywh(boxes) + boxes *= r_ + + mask_b = np.minimum(boxes[:, 2], boxes[:, 3]) > 1 + boxes_t = boxes[mask_b] + labels_t = labels[mask_b] + ids_t = ids[mask_b] + + if len(boxes_t) == 0: + image_t, r_o = preproc(image_o, input_dim, self.means, self.std) + boxes_o *= r_o + boxes_t = boxes_o + labels_t = labels_o + ids_t = ids_o + + labels_t = np.expand_dims(labels_t, 1) + ids_t = np.expand_dims(ids_t, 1) + + targets_t = np.hstack((labels_t, boxes_t, ids_t)) + padded_labels = np.zeros((self.max_labels, 6)) + padded_labels[range(len(targets_t))[: self.max_labels]] = targets_t[ + : self.max_labels + ] + padded_labels = np.ascontiguousarray(padded_labels, dtype=np.float32) + image_t = np.ascontiguousarray(image_t, dtype=np.float32) + return image_t, padded_labels + + +class ValTransform: + """ + Defines the transformations that should be applied to test PIL image + for input into the network + + dimension -> tensorize -> color adj + + Arguments: + resize (int): input dimension to SSD + rgb_means ((int,int,int)): average RGB of the dataset + (104,117,123) + swap ((int,int,int)): final order of channels + + Returns: + transform (transform) : callable transform to be applied to test/val + data + """ + + def __init__(self, rgb_means=None, std=None, swap=(2, 0, 1)): + self.means = rgb_means + self.swap = swap + self.std = std + + # assume input is cv2 img for now + def __call__(self, img, res, input_size): + img, _ = preproc(img, input_size, self.means, self.std, self.swap) + return img, np.zeros((1, 5)) diff --git a/yolox/data/data_prefetcher.py b/yolox/data/data_prefetcher.py new file mode 100644 index 0000000000000000000000000000000000000000..0f5d2b5eeec2b552f381239a16117a5c98255041 --- /dev/null +++ b/yolox/data/data_prefetcher.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import torch +import torch.distributed as dist + +from yolox.utils import synchronize + +import random + + +class DataPrefetcher: + """ + DataPrefetcher is inspired by code of following file: + https://github.com/NVIDIA/apex/blob/master/examples/imagenet/main_amp.py + It could speedup your pytorch dataloader. For more information, please check + https://github.com/NVIDIA/apex/issues/304#issuecomment-493562789. + """ + + def __init__(self, loader): + self.loader = iter(loader) + self.stream = torch.cuda.Stream() + self.input_cuda = self._input_cuda_for_image + self.record_stream = DataPrefetcher._record_stream_for_image + self.preload() + + def preload(self): + try: + self.next_input, self.next_target, _, _ = next(self.loader) + except StopIteration: + self.next_input = None + self.next_target = None + return + + with torch.cuda.stream(self.stream): + self.input_cuda() + self.next_target = self.next_target.cuda(non_blocking=True) + + def next(self): + torch.cuda.current_stream().wait_stream(self.stream) + input = self.next_input + target = self.next_target + if input is not None: + self.record_stream(input) + if target is not None: + target.record_stream(torch.cuda.current_stream()) + self.preload() + return input, target + + def _input_cuda_for_image(self): + self.next_input = self.next_input.cuda(non_blocking=True) + + @staticmethod + def _record_stream_for_image(input): + input.record_stream(torch.cuda.current_stream()) + + +def random_resize(data_loader, exp, epoch, rank, is_distributed): + tensor = torch.LongTensor(1).cuda() + if is_distributed: + synchronize() + + if rank == 0: + if epoch > exp.max_epoch - 10: + size = exp.input_size + else: + size = random.randint(*exp.random_size) + size = int(32 * size) + tensor.fill_(size) + + if is_distributed: + synchronize() + dist.broadcast(tensor, 0) + + input_size = data_loader.change_input_dim(multiple=tensor.item(), random_range=None) + return input_size diff --git a/yolox/data/dataloading.py b/yolox/data/dataloading.py new file mode 100644 index 0000000000000000000000000000000000000000..2756f2fc8b77ea6d02f2b0dd03ff99de08efba95 --- /dev/null +++ b/yolox/data/dataloading.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import torch +from torch.utils.data.dataloader import DataLoader as torchDataLoader +from torch.utils.data.dataloader import default_collate + +import os +import random + +from .samplers import YoloBatchSampler + + +def get_yolox_datadir(): + """ + get dataset dir of YOLOX. If environment variable named `YOLOX_DATADIR` is set, + this function will return value of the environment variable. Otherwise, use data + """ + yolox_datadir = os.getenv("YOLOX_DATADIR", None) + if yolox_datadir is None: + import yolox + + yolox_path = os.path.dirname(os.path.dirname(yolox.__file__)) + yolox_datadir = os.path.join(yolox_path, "datasets") + return yolox_datadir + + +class DataLoader(torchDataLoader): + """ + Lightnet dataloader that enables on the fly resizing of the images. + See :class:`torch.utils.data.DataLoader` for more information on the arguments. + Check more on the following website: + https://gitlab.com/EAVISE/lightnet/-/blob/master/lightnet/data/_dataloading.py + + Note: + This dataloader only works with :class:`lightnet.data.Dataset` based datasets. + + Example: + >>> class CustomSet(ln.data.Dataset): + ... def __len__(self): + ... return 4 + ... @ln.data.Dataset.resize_getitem + ... def __getitem__(self, index): + ... # Should return (image, anno) but here we return (input_dim,) + ... return (self.input_dim,) + >>> dl = ln.data.DataLoader( + ... CustomSet((200,200)), + ... batch_size = 2, + ... collate_fn = ln.data.list_collate # We want the data to be grouped as a list + ... ) + >>> dl.dataset.input_dim # Default input_dim + (200, 200) + >>> for d in dl: + ... d + [[(200, 200), (200, 200)]] + [[(200, 200), (200, 200)]] + >>> dl.change_input_dim(320, random_range=None) + (320, 320) + >>> for d in dl: + ... d + [[(320, 320), (320, 320)]] + [[(320, 320), (320, 320)]] + >>> dl.change_input_dim((480, 320), random_range=None) + (480, 320) + >>> for d in dl: + ... d + [[(480, 320), (480, 320)]] + [[(480, 320), (480, 320)]] + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.__initialized = False + shuffle = False + batch_sampler = None + if len(args) > 5: + shuffle = args[2] + sampler = args[3] + batch_sampler = args[4] + elif len(args) > 4: + shuffle = args[2] + sampler = args[3] + if "batch_sampler" in kwargs: + batch_sampler = kwargs["batch_sampler"] + elif len(args) > 3: + shuffle = args[2] + if "sampler" in kwargs: + sampler = kwargs["sampler"] + if "batch_sampler" in kwargs: + batch_sampler = kwargs["batch_sampler"] + else: + if "shuffle" in kwargs: + shuffle = kwargs["shuffle"] + if "sampler" in kwargs: + sampler = kwargs["sampler"] + if "batch_sampler" in kwargs: + batch_sampler = kwargs["batch_sampler"] + + # Use custom BatchSampler + if batch_sampler is None: + if sampler is None: + if shuffle: + sampler = torch.utils.data.sampler.RandomSampler(self.dataset) + # sampler = torch.utils.data.DistributedSampler(self.dataset) + else: + sampler = torch.utils.data.sampler.SequentialSampler(self.dataset) + batch_sampler = YoloBatchSampler( + sampler, + self.batch_size, + self.drop_last, + input_dimension=self.dataset.input_dim, + ) + # batch_sampler = IterationBasedBatchSampler(batch_sampler, num_iterations = + + self.batch_sampler = batch_sampler + + self.__initialized = True + + def close_mosaic(self): + self.batch_sampler.mosaic = False + + def change_input_dim(self, multiple=32, random_range=(10, 19)): + """This function will compute a new size and update it on the next mini_batch. + + Args: + multiple (int or tuple, optional): values to multiply the randomly generated range by. + Default **32** + random_range (tuple, optional): This (min, max) tuple sets the range + for the randomisation; Default **(10, 19)** + + Return: + tuple: width, height tuple with new dimension + + Note: + The new size is generated as follows: |br| + First we compute a random integer inside ``[random_range]``. + We then multiply that number with the ``multiple`` argument, + which gives our final new input size. |br| + If ``multiple`` is an integer we generate a square size. If you give a tuple + of **(width, height)**, the size is computed + as :math:`rng * multiple[0], rng * multiple[1]`. + + Note: + You can set the ``random_range`` argument to **None** to set + an exact size of multiply. |br| + See the example above for how this works. + """ + if random_range is None: + size = 1 + else: + size = random.randint(*random_range) + + if isinstance(multiple, int): + size = (size * multiple, size * multiple) + else: + size = (size * multiple[0], size * multiple[1]) + + self.batch_sampler.new_input_dim = size + + return size + + +def list_collate(batch): + """ + Function that collates lists or tuples together into one list (of lists/tuples). + Use this as the collate function in a Dataloader, if you want to have a list of + items as an output, as opposed to tensors (eg. Brambox.boxes). + """ + items = list(zip(*batch)) + + for i in range(len(items)): + if isinstance(items[i][0], (list, tuple)): + items[i] = list(items[i]) + else: + items[i] = default_collate(items[i]) + + return items diff --git a/yolox/data/datasets/__init__.py b/yolox/data/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..61065a88874f8da6a92542801114ca9a5afe8eac --- /dev/null +++ b/yolox/data/datasets/__init__.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +from .datasets_wrapper import ConcatDataset, Dataset, MixConcatDataset +from .mosaicdetection import MosaicDetection +from .mot import MOTDataset diff --git a/yolox/data/datasets/datasets_wrapper.py b/yolox/data/datasets/datasets_wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..a262e6a6755e7fa69132d3f59fbe20b1bb17a6cf --- /dev/null +++ b/yolox/data/datasets/datasets_wrapper.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +from torch.utils.data.dataset import ConcatDataset as torchConcatDataset +from torch.utils.data.dataset import Dataset as torchDataset + +import bisect +from functools import wraps + + +class ConcatDataset(torchConcatDataset): + def __init__(self, datasets): + super(ConcatDataset, self).__init__(datasets) + if hasattr(self.datasets[0], "input_dim"): + self._input_dim = self.datasets[0].input_dim + self.input_dim = self.datasets[0].input_dim + + def pull_item(self, idx): + if idx < 0: + if -idx > len(self): + raise ValueError( + "absolute value of index should not exceed dataset length" + ) + idx = len(self) + idx + dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) + if dataset_idx == 0: + sample_idx = idx + else: + sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] + return self.datasets[dataset_idx].pull_item(sample_idx) + + +class MixConcatDataset(torchConcatDataset): + def __init__(self, datasets): + super(MixConcatDataset, self).__init__(datasets) + if hasattr(self.datasets[0], "input_dim"): + self._input_dim = self.datasets[0].input_dim + self.input_dim = self.datasets[0].input_dim + + def __getitem__(self, index): + + if not isinstance(index, int): + idx = index[1] + if idx < 0: + if -idx > len(self): + raise ValueError( + "absolute value of index should not exceed dataset length" + ) + idx = len(self) + idx + dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) + if dataset_idx == 0: + sample_idx = idx + else: + sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] + if not isinstance(index, int): + index = (index[0], sample_idx, index[2]) + + return self.datasets[dataset_idx][index] + + +class Dataset(torchDataset): + """ This class is a subclass of the base :class:`torch.utils.data.Dataset`, + that enables on the fly resizing of the ``input_dim``. + + Args: + input_dimension (tuple): (width,height) tuple with default dimensions of the network + """ + + def __init__(self, input_dimension, mosaic=True): + super().__init__() + self.__input_dim = input_dimension[:2] + self.enable_mosaic = mosaic + + @property + def input_dim(self): + """ + Dimension that can be used by transforms to set the correct image size, etc. + This allows transforms to have a single source of truth + for the input dimension of the network. + + Return: + list: Tuple containing the current width,height + """ + if hasattr(self, "_input_dim"): + return self._input_dim + return self.__input_dim + + @staticmethod + def resize_getitem(getitem_fn): + """ + Decorator method that needs to be used around the ``__getitem__`` method. |br| + This decorator enables the on the fly resizing of + the ``input_dim`` with our :class:`~lightnet.data.DataLoader` class. + + Example: + >>> class CustomSet(ln.data.Dataset): + ... def __len__(self): + ... return 10 + ... @ln.data.Dataset.resize_getitem + ... def __getitem__(self, index): + ... # Should return (image, anno) but here we return input_dim + ... return self.input_dim + >>> data = CustomSet((200,200)) + >>> data[0] + (200, 200) + >>> data[(480,320), 0] + (480, 320) + """ + + @wraps(getitem_fn) + def wrapper(self, index): + if not isinstance(index, int): + has_dim = True + self._input_dim = index[0] + self.enable_mosaic = index[2] + index = index[1] + else: + has_dim = False + + ret_val = getitem_fn(self, index) + + if has_dim: + del self._input_dim + + return ret_val + + return wrapper diff --git a/yolox/data/datasets/mosaicdetection.py b/yolox/data/datasets/mosaicdetection.py new file mode 100644 index 0000000000000000000000000000000000000000..d2bf39f7a4ebe0cdea59e01ad20dc0695d2d51fa --- /dev/null +++ b/yolox/data/datasets/mosaicdetection.py @@ -0,0 +1,242 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import cv2 +import numpy as np + +from yolox.utils import adjust_box_anns + +import random + +from ..data_augment import box_candidates, random_perspective, augment_hsv +from .datasets_wrapper import Dataset + + +def get_mosaic_coordinate(mosaic_image, mosaic_index, xc, yc, w, h, input_h, input_w): + # TODO update doc + # index0 to top left part of image + if mosaic_index == 0: + x1, y1, x2, y2 = max(xc - w, 0), max(yc - h, 0), xc, yc + small_coord = w - (x2 - x1), h - (y2 - y1), w, h + # index1 to top right part of image + elif mosaic_index == 1: + x1, y1, x2, y2 = xc, max(yc - h, 0), min(xc + w, input_w * 2), yc + small_coord = 0, h - (y2 - y1), min(w, x2 - x1), h + # index2 to bottom left part of image + elif mosaic_index == 2: + x1, y1, x2, y2 = max(xc - w, 0), yc, xc, min(input_h * 2, yc + h) + small_coord = w - (x2 - x1), 0, w, min(y2 - y1, h) + # index2 to bottom right part of image + elif mosaic_index == 3: + x1, y1, x2, y2 = xc, yc, min(xc + w, input_w * 2), min(input_h * 2, yc + h) # noqa + small_coord = 0, 0, min(w, x2 - x1), min(y2 - y1, h) + return (x1, y1, x2, y2), small_coord + + +class MosaicDetection(Dataset): + """Detection dataset wrapper that performs mixup for normal dataset.""" + + def __init__( + self, dataset, img_size, mosaic=True, preproc=None, + degrees=10.0, translate=0.1, scale=(0.5, 1.5), mscale=(0.5, 1.5), + shear=2.0, perspective=0.0, enable_mixup=True, *args + ): + """ + + Args: + dataset(Dataset) : Pytorch dataset object. + img_size (tuple): + mosaic (bool): enable mosaic augmentation or not. + preproc (func): + degrees (float): + translate (float): + scale (tuple): + mscale (tuple): + shear (float): + perspective (float): + enable_mixup (bool): + *args(tuple) : Additional arguments for mixup random sampler. + """ + super().__init__(img_size, mosaic=mosaic) + self._dataset = dataset + self.preproc = preproc + self.degrees = degrees + self.translate = translate + self.scale = scale + self.shear = shear + self.perspective = perspective + self.mixup_scale = mscale + self.enable_mosaic = mosaic + self.enable_mixup = enable_mixup + + def __len__(self): + return len(self._dataset) + + @Dataset.resize_getitem + def __getitem__(self, idx): + if self.enable_mosaic: + mosaic_labels = [] + input_dim = self._dataset.input_dim + input_h, input_w = input_dim[0], input_dim[1] + + # yc, xc = s, s # mosaic center x, y + yc = int(random.uniform(0.5 * input_h, 1.5 * input_h)) + xc = int(random.uniform(0.5 * input_w, 1.5 * input_w)) + + # 3 additional image indices + indices = [idx] + [random.randint(0, len(self._dataset) - 1) for _ in range(3)] + + for i_mosaic, index in enumerate(indices): + img, _labels, _, _ = self._dataset.pull_item(index) + h0, w0 = img.shape[:2] # orig hw + scale = min(1. * input_h / h0, 1. * input_w / w0) + img = cv2.resize( + img, (int(w0 * scale), int(h0 * scale)), interpolation=cv2.INTER_LINEAR + ) + # generate output mosaic image + (h, w, c) = img.shape[:3] + if i_mosaic == 0: + mosaic_img = np.full((input_h * 2, input_w * 2, c), 114, dtype=np.uint8) + + # suffix l means large image, while s means small image in mosaic aug. + (l_x1, l_y1, l_x2, l_y2), (s_x1, s_y1, s_x2, s_y2) = get_mosaic_coordinate( + mosaic_img, i_mosaic, xc, yc, w, h, input_h, input_w + ) + + mosaic_img[l_y1:l_y2, l_x1:l_x2] = img[s_y1:s_y2, s_x1:s_x2] + padw, padh = l_x1 - s_x1, l_y1 - s_y1 + + labels = _labels.copy() + # Normalized xywh to pixel xyxy format + if _labels.size > 0: + labels[:, 0] = scale * _labels[:, 0] + padw + labels[:, 1] = scale * _labels[:, 1] + padh + labels[:, 2] = scale * _labels[:, 2] + padw + labels[:, 3] = scale * _labels[:, 3] + padh + mosaic_labels.append(labels) + + if len(mosaic_labels): + mosaic_labels = np.concatenate(mosaic_labels, 0) + ''' + np.clip(mosaic_labels[:, 0], 0, 2 * input_w, out=mosaic_labels[:, 0]) + np.clip(mosaic_labels[:, 1], 0, 2 * input_h, out=mosaic_labels[:, 1]) + np.clip(mosaic_labels[:, 2], 0, 2 * input_w, out=mosaic_labels[:, 2]) + np.clip(mosaic_labels[:, 3], 0, 2 * input_h, out=mosaic_labels[:, 3]) + ''' + + mosaic_labels = mosaic_labels[mosaic_labels[:, 0] < 2 * input_w] + mosaic_labels = mosaic_labels[mosaic_labels[:, 2] > 0] + mosaic_labels = mosaic_labels[mosaic_labels[:, 1] < 2 * input_h] + mosaic_labels = mosaic_labels[mosaic_labels[:, 3] > 0] + + #augment_hsv(mosaic_img) + mosaic_img, mosaic_labels = random_perspective( + mosaic_img, + mosaic_labels, + degrees=self.degrees, + translate=self.translate, + scale=self.scale, + shear=self.shear, + perspective=self.perspective, + border=[-input_h // 2, -input_w // 2], + ) # border to remove + + # ----------------------------------------------------------------- + # CopyPaste: https://arxiv.org/abs/2012.07177 + # ----------------------------------------------------------------- + if self.enable_mixup and not len(mosaic_labels) == 0: + mosaic_img, mosaic_labels = self.mixup(mosaic_img, mosaic_labels, self.input_dim) + + mix_img, padded_labels = self.preproc(mosaic_img, mosaic_labels, self.input_dim) + img_info = (mix_img.shape[1], mix_img.shape[0]) + + return mix_img, padded_labels, img_info, np.array([idx]) + + else: + self._dataset._input_dim = self.input_dim + img, label, img_info, id_ = self._dataset.pull_item(idx) + img, label = self.preproc(img, label, self.input_dim) + return img, label, img_info, id_ + + def mixup(self, origin_img, origin_labels, input_dim): + jit_factor = random.uniform(*self.mixup_scale) + FLIP = random.uniform(0, 1) > 0.5 + cp_labels = [] + while len(cp_labels) == 0: + cp_index = random.randint(0, self.__len__() - 1) + cp_labels = self._dataset.load_anno(cp_index) + img, cp_labels, _, _ = self._dataset.pull_item(cp_index) + + if len(img.shape) == 3: + cp_img = np.ones((input_dim[0], input_dim[1], 3)) * 114.0 + else: + cp_img = np.ones(input_dim) * 114.0 + cp_scale_ratio = min(input_dim[0] / img.shape[0], input_dim[1] / img.shape[1]) + resized_img = cv2.resize( + img, + (int(img.shape[1] * cp_scale_ratio), int(img.shape[0] * cp_scale_ratio)), + interpolation=cv2.INTER_LINEAR, + ).astype(np.float32) + cp_img[ + : int(img.shape[0] * cp_scale_ratio), : int(img.shape[1] * cp_scale_ratio) + ] = resized_img + cp_img = cv2.resize( + cp_img, + (int(cp_img.shape[1] * jit_factor), int(cp_img.shape[0] * jit_factor)), + ) + cp_scale_ratio *= jit_factor + if FLIP: + cp_img = cp_img[:, ::-1, :] + + origin_h, origin_w = cp_img.shape[:2] + target_h, target_w = origin_img.shape[:2] + padded_img = np.zeros( + (max(origin_h, target_h), max(origin_w, target_w), 3) + ).astype(np.uint8) + padded_img[:origin_h, :origin_w] = cp_img + + x_offset, y_offset = 0, 0 + if padded_img.shape[0] > target_h: + y_offset = random.randint(0, padded_img.shape[0] - target_h - 1) + if padded_img.shape[1] > target_w: + x_offset = random.randint(0, padded_img.shape[1] - target_w - 1) + padded_cropped_img = padded_img[ + y_offset: y_offset + target_h, x_offset: x_offset + target_w + ] + + cp_bboxes_origin_np = adjust_box_anns( + cp_labels[:, :4].copy(), cp_scale_ratio, 0, 0, origin_w, origin_h + ) + if FLIP: + cp_bboxes_origin_np[:, 0::2] = ( + origin_w - cp_bboxes_origin_np[:, 0::2][:, ::-1] + ) + cp_bboxes_transformed_np = cp_bboxes_origin_np.copy() + ''' + cp_bboxes_transformed_np[:, 0::2] = np.clip( + cp_bboxes_transformed_np[:, 0::2] - x_offset, 0, target_w + ) + cp_bboxes_transformed_np[:, 1::2] = np.clip( + cp_bboxes_transformed_np[:, 1::2] - y_offset, 0, target_h + ) + ''' + cp_bboxes_transformed_np[:, 0::2] = cp_bboxes_transformed_np[:, 0::2] - x_offset + cp_bboxes_transformed_np[:, 1::2] = cp_bboxes_transformed_np[:, 1::2] - y_offset + keep_list = box_candidates(cp_bboxes_origin_np.T, cp_bboxes_transformed_np.T, 5) + + if keep_list.sum() >= 1.0: + cls_labels = cp_labels[keep_list, 4:5].copy() + id_labels = cp_labels[keep_list, 5:6].copy() + box_labels = cp_bboxes_transformed_np[keep_list] + labels = np.hstack((box_labels, cls_labels, id_labels)) + # remove outside bbox + labels = labels[labels[:, 0] < target_w] + labels = labels[labels[:, 2] > 0] + labels = labels[labels[:, 1] < target_h] + labels = labels[labels[:, 3] > 0] + origin_labels = np.vstack((origin_labels, labels)) + origin_img = origin_img.astype(np.float32) + origin_img = 0.5 * origin_img + 0.5 * padded_cropped_img.astype(np.float32) + + return origin_img, origin_labels diff --git a/yolox/data/datasets/mot.py b/yolox/data/datasets/mot.py new file mode 100644 index 0000000000000000000000000000000000000000..d52febcbbe816bdd3d1e07f2d042e115ae330442 --- /dev/null +++ b/yolox/data/datasets/mot.py @@ -0,0 +1,132 @@ +import cv2 +import numpy as np +from pycocotools.coco import COCO + +import os + +from ..dataloading import get_yolox_datadir +from .datasets_wrapper import Dataset + + +class MOTDataset(Dataset): + """ + COCO dataset class. + """ + + def __init__( + self, + data_dir=None, + json_file="train_half.json", + name="train", + img_size=(608, 1088), + preproc=None, + ): + """ + COCO dataset initialization. Annotation data are read into memory by COCO API. + Args: + data_dir (str): dataset root directory + json_file (str): COCO json file name + name (str): COCO data name (e.g. 'train2017' or 'val2017') + img_size (int): target image size after pre-processing + preproc: data augmentation strategy + """ + super().__init__(img_size) + if data_dir is None: + data_dir = os.path.join(get_yolox_datadir(), "mot") + self.data_dir = data_dir + self.json_file = json_file + + self.coco = COCO(os.path.join(self.data_dir, "annotations", self.json_file)) + self.ids = self.coco.getImgIds() + self.class_ids = sorted(self.coco.getCatIds()) + cats = self.coco.loadCats(self.coco.getCatIds()) + self._classes = tuple([c["name"] for c in cats]) + self.annotations = self._load_coco_annotations() + self.name = name + self.img_size = img_size + self.preproc = preproc + + def __len__(self): + return len(self.ids) + + def _load_coco_annotations(self): + return [self.load_anno_from_ids(_ids) for _ids in self.ids] + + def load_anno_from_ids(self, id_): + im_ann = self.coco.loadImgs(id_)[0] + width = im_ann["width"] + height = im_ann["height"] + frame_id = im_ann["frame_id"] + video_id = im_ann["video_id"] + anno_ids = self.coco.getAnnIds(imgIds=[int(id_)], iscrowd=False) + annotations = self.coco.loadAnns(anno_ids) + objs = [] + for obj in annotations: + x1 = obj["bbox"][0] + y1 = obj["bbox"][1] + x2 = x1 + obj["bbox"][2] + y2 = y1 + obj["bbox"][3] + if obj["area"] > 0 and x2 >= x1 and y2 >= y1: + obj["clean_bbox"] = [x1, y1, x2, y2] + objs.append(obj) + + num_objs = len(objs) + + res = np.zeros((num_objs, 6)) + + for ix, obj in enumerate(objs): + cls = self.class_ids.index(obj["category_id"]) + res[ix, 0:4] = obj["clean_bbox"] + res[ix, 4] = cls + res[ix, 5] = obj["track_id"] + + file_name = im_ann["file_name"] if "file_name" in im_ann else "{:012}".format(id_) + ".jpg" + img_info = (height, width, frame_id, video_id, file_name) + + del im_ann, annotations + + return (res, img_info, file_name) + + def load_anno(self, index): + return self.annotations[index][0] + + def pull_item(self, index): + id_ = self.ids[index] + + res, img_info, file_name = self.annotations[index] + # load image and preprocess + img_file = os.path.join( + self.data_dir, self.name, file_name + ) + img = cv2.imread(img_file) + assert img is not None + + return img, res.copy(), img_info, np.array([id_]) + + @Dataset.resize_getitem + def __getitem__(self, index): + """ + One image / label pair for the given index is picked up and pre-processed. + + Args: + index (int): data index + + Returns: + img (numpy.ndarray): pre-processed image + padded_labels (torch.Tensor): pre-processed label data. + The shape is :math:`[max_labels, 5]`. + each label consists of [class, xc, yc, w, h]: + class (float): class index. + xc, yc (float) : center of bbox whose values range from 0 to 1. + w, h (float) : size of bbox whose values range from 0 to 1. + info_img : tuple of h, w, nh, nw, dx, dy. + h, w (int): original shape of the image + nh, nw (int): shape of the resized image without padding + dx, dy (int): pad size + img_id (int): same as the input index. Used for evaluation. + """ + img, target, img_info, img_id = self.pull_item(index) + + if self.preproc is not None: + img, target = self.preproc(img, target, self.input_dim) + return img, target, img_info, img_id diff --git a/yolox/data/samplers.py b/yolox/data/samplers.py new file mode 100644 index 0000000000000000000000000000000000000000..064b13376b3c813ad6f9e5745496dd5027b65f0f --- /dev/null +++ b/yolox/data/samplers.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +import torch +import torch.distributed as dist +from torch.utils.data.sampler import BatchSampler as torchBatchSampler +from torch.utils.data.sampler import Sampler + +import itertools +from typing import Optional + + +class YoloBatchSampler(torchBatchSampler): + """ + This batch sampler will generate mini-batches of (dim, index) tuples from another sampler. + It works just like the :class:`torch.utils.data.sampler.BatchSampler`, + but it will prepend a dimension, whilst ensuring it stays the same across one mini-batch. + """ + + def __init__(self, *args, input_dimension=None, mosaic=True, **kwargs): + super().__init__(*args, **kwargs) + self.input_dim = input_dimension + self.new_input_dim = None + self.mosaic = mosaic + + def __iter__(self): + self.__set_input_dim() + for batch in super().__iter__(): + yield [(self.input_dim, idx, self.mosaic) for idx in batch] + self.__set_input_dim() + + def __set_input_dim(self): + """ This function randomly changes the the input dimension of the dataset. """ + if self.new_input_dim is not None: + self.input_dim = (self.new_input_dim[0], self.new_input_dim[1]) + self.new_input_dim = None + + +class InfiniteSampler(Sampler): + """ + In training, we only care about the "infinite stream" of training data. + So this sampler produces an infinite stream of indices and + all workers cooperate to correctly shuffle the indices and sample different indices. + The samplers in each worker effectively produces `indices[worker_id::num_workers]` + where `indices` is an infinite stream of indices consisting of + `shuffle(range(size)) + shuffle(range(size)) + ...` (if shuffle is True) + or `range(size) + range(size) + ...` (if shuffle is False) + """ + + def __init__( + self, + size: int, + shuffle: bool = True, + seed: Optional[int] = 0, + rank=0, + world_size=1, + ): + """ + Args: + size (int): the total number of data of the underlying dataset to sample from + shuffle (bool): whether to shuffle the indices or not + seed (int): the initial seed of the shuffle. Must be the same + across all workers. If None, will use a random seed shared + among workers (require synchronization among all workers). + """ + self._size = size + assert size > 0 + self._shuffle = shuffle + self._seed = int(seed) + + if dist.is_available() and dist.is_initialized(): + self._rank = dist.get_rank() + self._world_size = dist.get_world_size() + else: + self._rank = rank + self._world_size = world_size + + def __iter__(self): + start = self._rank + yield from itertools.islice( + self._infinite_indices(), start, None, self._world_size + ) + + def _infinite_indices(self): + g = torch.Generator() + g.manual_seed(self._seed) + while True: + if self._shuffle: + yield from torch.randperm(self._size, generator=g) + else: + yield from torch.arange(self._size) + + def __len__(self): + return self._size // self._world_size diff --git a/yolox/deepsort_tracker/deepsort.py b/yolox/deepsort_tracker/deepsort.py new file mode 100644 index 0000000000000000000000000000000000000000..198f3b0dd7bf34bde4bf16eb9c80f068a8635ced --- /dev/null +++ b/yolox/deepsort_tracker/deepsort.py @@ -0,0 +1,295 @@ +import numpy as np +import torch +import cv2 +import os + +from .reid_model import Extractor +from yolox.deepsort_tracker import kalman_filter, linear_assignment, iou_matching +from yolox.data.dataloading import get_yolox_datadir +from .detection import Detection +from .track import Track + + +def _cosine_distance(a, b, data_is_normalized=False): + if not data_is_normalized: + a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True) + b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True) + return 1. - np.dot(a, b.T) + + +def _nn_cosine_distance(x, y): + distances = _cosine_distance(x, y) + return distances.min(axis=0) + + +class Tracker: + def __init__(self, metric, max_iou_distance=0.7, max_age=70, n_init=3): + self.metric = metric + self.max_iou_distance = max_iou_distance + self.max_age = max_age + self.n_init = n_init + + self.kf = kalman_filter.KalmanFilter() + self.tracks = [] + self._next_id = 1 + + def predict(self): + """Propagate track state distributions one time step forward. + This function should be called once every time step, before `update`. + """ + for track in self.tracks: + track.predict(self.kf) + + def increment_ages(self): + for track in self.tracks: + track.increment_age() + track.mark_missed() + + def update(self, detections, classes): + """Perform measurement update and track management. + Parameters + ---------- + detections : List[deep_sort.detection.Detection] + A list of detections at the current time step. + """ + # Run matching cascade. + matches, unmatched_tracks, unmatched_detections = \ + self._match(detections) + + # Update track set. + for track_idx, detection_idx in matches: + self.tracks[track_idx].update( + self.kf, detections[detection_idx]) + for track_idx in unmatched_tracks: + self.tracks[track_idx].mark_missed() + for detection_idx in unmatched_detections: + self._initiate_track(detections[detection_idx], classes[detection_idx].item()) + self.tracks = [t for t in self.tracks if not t.is_deleted()] + + # Update distance metric. + active_targets = [t.track_id for t in self.tracks if t.is_confirmed()] + features, targets = [], [] + for track in self.tracks: + if not track.is_confirmed(): + continue + features += track.features + targets += [track.track_id for _ in track.features] + track.features = [] + self.metric.partial_fit( + np.asarray(features), np.asarray(targets), active_targets) + + def _match(self, detections): + + def gated_metric(tracks, dets, track_indices, detection_indices): + features = np.array([dets[i].feature for i in detection_indices]) + targets = np.array([tracks[i].track_id for i in track_indices]) + cost_matrix = self.metric.distance(features, targets) + cost_matrix = linear_assignment.gate_cost_matrix( + self.kf, cost_matrix, tracks, dets, track_indices, + detection_indices) + + return cost_matrix + + # Split track set into confirmed and unconfirmed tracks. + confirmed_tracks = [ + i for i, t in enumerate(self.tracks) if t.is_confirmed()] + unconfirmed_tracks = [ + i for i, t in enumerate(self.tracks) if not t.is_confirmed()] + + # Associate confirmed tracks using appearance features. + matches_a, unmatched_tracks_a, unmatched_detections = \ + linear_assignment.matching_cascade( + gated_metric, self.metric.matching_threshold, self.max_age, + self.tracks, detections, confirmed_tracks) + + # Associate remaining tracks together with unconfirmed tracks using IOU. + iou_track_candidates = unconfirmed_tracks + [ + k for k in unmatched_tracks_a if + self.tracks[k].time_since_update == 1] + unmatched_tracks_a = [ + k for k in unmatched_tracks_a if + self.tracks[k].time_since_update != 1] + matches_b, unmatched_tracks_b, unmatched_detections = \ + linear_assignment.min_cost_matching( + iou_matching.iou_cost, self.max_iou_distance, self.tracks, + detections, iou_track_candidates, unmatched_detections) + + matches = matches_a + matches_b + unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b)) + return matches, unmatched_tracks, unmatched_detections + + def _initiate_track(self, detection, class_id): + mean, covariance = self.kf.initiate(detection.to_xyah()) + self.tracks.append(Track( + mean, covariance, self._next_id, class_id, self.n_init, self.max_age, + detection.feature)) + self._next_id += 1 + + +class NearestNeighborDistanceMetric(object): + def __init__(self, metric, matching_threshold, budget=None): + + if metric == "cosine": + self._metric = _nn_cosine_distance + else: + raise ValueError( + "Invalid metric; must be either 'euclidean' or 'cosine'") + self.matching_threshold = matching_threshold + self.budget = budget + self.samples = {} + + def partial_fit(self, features, targets, active_targets): + for feature, target in zip(features, targets): + self.samples.setdefault(target, []).append(feature) + if self.budget is not None: + self.samples[target] = self.samples[target][-self.budget:] + self.samples = {k: self.samples[k] for k in active_targets} + + def distance(self, features, targets): + cost_matrix = np.zeros((len(targets), len(features))) + for i, target in enumerate(targets): + cost_matrix[i, :] = self._metric(self.samples[target], features) + return cost_matrix + + +class DeepSort(object): + def __init__(self, model_path, max_dist=0.1, min_confidence=0.3, nms_max_overlap=1.0, max_iou_distance=0.7, max_age=30, n_init=3, nn_budget=100, use_cuda=True): + self.min_confidence = min_confidence + self.nms_max_overlap = nms_max_overlap + + self.extractor = Extractor(model_path, use_cuda=use_cuda) + + max_cosine_distance = max_dist + metric = NearestNeighborDistanceMetric( + "cosine", max_cosine_distance, nn_budget) + self.tracker = Tracker( + metric, max_iou_distance=max_iou_distance, max_age=max_age, n_init=n_init) + + def update(self, output_results, img_info, img_size, img_file_name): + img_file_name = os.path.join(get_yolox_datadir(), 'mot', 'train', img_file_name) + ori_img = cv2.imread(img_file_name) + self.height, self.width = ori_img.shape[:2] + # post process detections + output_results = output_results.cpu().numpy() + confidences = output_results[:, 4] * output_results[:, 5] + + bboxes = output_results[:, :4] # x1y1x2y2 + img_h, img_w = img_info[0], img_info[1] + scale = min(img_size[0] / float(img_h), img_size[1] / float(img_w)) + bboxes /= scale + bbox_xyxy = bboxes + bbox_tlwh = self._xyxy_to_tlwh_array(bbox_xyxy) + remain_inds = confidences > self.min_confidence + bbox_tlwh = bbox_tlwh[remain_inds] + confidences = confidences[remain_inds] + + # generate detections + features = self._get_features(bbox_tlwh, ori_img) + detections = [Detection(bbox_tlwh[i], conf, features[i]) for i, conf in enumerate( + confidences) if conf > self.min_confidence] + classes = np.zeros((len(detections), )) + + # run on non-maximum supression + boxes = np.array([d.tlwh for d in detections]) + scores = np.array([d.confidence for d in detections]) + + # update tracker + self.tracker.predict() + self.tracker.update(detections, classes) + + # output bbox identities + outputs = [] + for track in self.tracker.tracks: + if not track.is_confirmed() or track.time_since_update > 1: + continue + box = track.to_tlwh() + x1, y1, x2, y2 = self._tlwh_to_xyxy_noclip(box) + track_id = track.track_id + class_id = track.class_id + outputs.append(np.array([x1, y1, x2, y2, track_id, class_id], dtype=np.int)) + if len(outputs) > 0: + outputs = np.stack(outputs, axis=0) + return outputs + + """ + TODO: + Convert bbox from xc_yc_w_h to xtl_ytl_w_h + Thanks JieChen91@github.com for reporting this bug! + """ + @staticmethod + def _xywh_to_tlwh(bbox_xywh): + if isinstance(bbox_xywh, np.ndarray): + bbox_tlwh = bbox_xywh.copy() + elif isinstance(bbox_xywh, torch.Tensor): + bbox_tlwh = bbox_xywh.clone() + bbox_tlwh[:, 0] = bbox_xywh[:, 0] - bbox_xywh[:, 2] / 2. + bbox_tlwh[:, 1] = bbox_xywh[:, 1] - bbox_xywh[:, 3] / 2. + return bbox_tlwh + + @staticmethod + def _xyxy_to_tlwh_array(bbox_xyxy): + if isinstance(bbox_xyxy, np.ndarray): + bbox_tlwh = bbox_xyxy.copy() + elif isinstance(bbox_xyxy, torch.Tensor): + bbox_tlwh = bbox_xyxy.clone() + bbox_tlwh[:, 2] = bbox_xyxy[:, 2] - bbox_xyxy[:, 0] + bbox_tlwh[:, 3] = bbox_xyxy[:, 3] - bbox_xyxy[:, 1] + return bbox_tlwh + + def _xywh_to_xyxy(self, bbox_xywh): + x, y, w, h = bbox_xywh + x1 = max(int(x - w / 2), 0) + x2 = min(int(x + w / 2), self.width - 1) + y1 = max(int(y - h / 2), 0) + y2 = min(int(y + h / 2), self.height - 1) + return x1, y1, x2, y2 + + def _tlwh_to_xyxy(self, bbox_tlwh): + """ + TODO: + Convert bbox from xtl_ytl_w_h to xc_yc_w_h + Thanks JieChen91@github.com for reporting this bug! + """ + x, y, w, h = bbox_tlwh + x1 = max(int(x), 0) + x2 = min(int(x+w), self.width - 1) + y1 = max(int(y), 0) + y2 = min(int(y+h), self.height - 1) + return x1, y1, x2, y2 + + def _tlwh_to_xyxy_noclip(self, bbox_tlwh): + """ + TODO: + Convert bbox from xtl_ytl_w_h to xc_yc_w_h + Thanks JieChen91@github.com for reporting this bug! + """ + x, y, w, h = bbox_tlwh + x1 = x + x2 = x + w + y1 = y + y2 = y + h + return x1, y1, x2, y2 + + def increment_ages(self): + self.tracker.increment_ages() + + def _xyxy_to_tlwh(self, bbox_xyxy): + x1, y1, x2, y2 = bbox_xyxy + + t = x1 + l = y1 + w = int(x2 - x1) + h = int(y2 - y1) + return t, l, w, h + + def _get_features(self, bbox_xywh, ori_img): + im_crops = [] + for box in bbox_xywh: + x1, y1, x2, y2 = self._tlwh_to_xyxy(box) + im = ori_img[y1:y2, x1:x2] + im_crops.append(im) + if im_crops: + features = self.extractor(im_crops) + else: + features = np.array([]) + return features diff --git a/yolox/deepsort_tracker/detection.py b/yolox/deepsort_tracker/detection.py new file mode 100644 index 0000000000000000000000000000000000000000..9bd84977d840bfd7a63daf8efafdcb8f7615c6a3 --- /dev/null +++ b/yolox/deepsort_tracker/detection.py @@ -0,0 +1,46 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np + + +class Detection(object): + """ + This class represents a bounding box detection in a single image. + Parameters + ---------- + tlwh : array_like + Bounding box in format `(x, y, w, h)`. + confidence : float + Detector confidence score. + feature : array_like + A feature vector that describes the object contained in this image. + Attributes + ---------- + tlwh : ndarray + Bounding box in format `(top left x, top left y, width, height)`. + confidence : ndarray + Detector confidence score. + feature : ndarray | NoneType + A feature vector that describes the object contained in this image. + """ + + def __init__(self, tlwh, confidence, feature): + self.tlwh = np.asarray(tlwh, dtype=np.float) + self.confidence = float(confidence) + self.feature = np.asarray(feature, dtype=np.float32) + + def to_tlbr(self): + """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + def to_xyah(self): + """Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = self.tlwh.copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret \ No newline at end of file diff --git a/yolox/deepsort_tracker/iou_matching.py b/yolox/deepsort_tracker/iou_matching.py new file mode 100644 index 0000000000000000000000000000000000000000..7a27a4dbff2360a09943442140d52743cd66d8c4 --- /dev/null +++ b/yolox/deepsort_tracker/iou_matching.py @@ -0,0 +1,76 @@ +# vim: expandtab:ts=4:sw=4 +from __future__ import absolute_import +import numpy as np +from yolox.deepsort_tracker import linear_assignment + + +def iou(bbox, candidates): + """Computer intersection over union. + Parameters + ---------- + bbox : ndarray + A bounding box in format `(top left x, top left y, width, height)`. + candidates : ndarray + A matrix of candidate bounding boxes (one per row) in the same format + as `bbox`. + Returns + ------- + ndarray + The intersection over union in [0, 1] between the `bbox` and each + candidate. A higher score means a larger fraction of the `bbox` is + occluded by the candidate. + """ + bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:] + candidates_tl = candidates[:, :2] + candidates_br = candidates[:, :2] + candidates[:, 2:] + + tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], + np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] + br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], + np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] + wh = np.maximum(0., br - tl) + + area_intersection = wh.prod(axis=1) + area_bbox = bbox[2:].prod() + area_candidates = candidates[:, 2:].prod(axis=1) + return area_intersection / (area_bbox + area_candidates - area_intersection) + + +def iou_cost(tracks, detections, track_indices=None, + detection_indices=None): + """An intersection over union distance metric. + Parameters + ---------- + tracks : List[deep_sort.track.Track] + A list of tracks. + detections : List[deep_sort.detection.Detection] + A list of detections. + track_indices : Optional[List[int]] + A list of indices to tracks that should be matched. Defaults to + all `tracks`. + detection_indices : Optional[List[int]] + A list of indices to detections that should be matched. Defaults + to all `detections`. + Returns + ------- + ndarray + Returns a cost matrix of shape + len(track_indices), len(detection_indices) where entry (i, j) is + `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. + """ + if track_indices is None: + track_indices = np.arange(len(tracks)) + if detection_indices is None: + detection_indices = np.arange(len(detections)) + + cost_matrix = np.zeros((len(track_indices), len(detection_indices))) + for row, track_idx in enumerate(track_indices): + if tracks[track_idx].time_since_update > 1: + cost_matrix[row, :] = linear_assignment.INFTY_COST + continue + + bbox = tracks[track_idx].to_tlwh() + candidates = np.asarray( + [detections[i].tlwh for i in detection_indices]) + cost_matrix[row, :] = 1. - iou(bbox, candidates) + return cost_matrix \ No newline at end of file diff --git a/yolox/deepsort_tracker/kalman_filter.py b/yolox/deepsort_tracker/kalman_filter.py new file mode 100644 index 0000000000000000000000000000000000000000..1d4bcc36b9ab44eb08fe41161839b8d7e604b6fe --- /dev/null +++ b/yolox/deepsort_tracker/kalman_filter.py @@ -0,0 +1,208 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np +import scipy.linalg + + +""" +Table for the 0.95 quantile of the chi-square distribution with N degrees of +freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv +function and used as Mahalanobis gating threshold. +""" +chi2inv95 = { + 1: 3.8415, + 2: 5.9915, + 3: 7.8147, + 4: 9.4877, + 5: 11.070, + 6: 12.592, + 7: 14.067, + 8: 15.507, + 9: 16.919} + + +class KalmanFilter(object): + """ + A simple Kalman filter for tracking bounding boxes in image space. + The 8-dimensional state space + x, y, a, h, vx, vy, va, vh + contains the bounding box center position (x, y), aspect ratio a, height h, + and their respective velocities. + Object motion follows a constant velocity model. The bounding box location + (x, y, a, h) is taken as direct observation of the state space (linear + observation model). + """ + + def __init__(self): + ndim, dt = 4, 1. + + # Create Kalman filter model matrices. + self._motion_mat = np.eye(2 * ndim, 2 * ndim) + for i in range(ndim): + self._motion_mat[i, ndim + i] = dt + self._update_mat = np.eye(ndim, 2 * ndim) + + # Motion and observation uncertainty are chosen relative to the current + # state estimate. These weights control the amount of uncertainty in + # the model. This is a bit hacky. + self._std_weight_position = 1. / 20 + self._std_weight_velocity = 1. / 160 + + def initiate(self, measurement): + """Create track from unassociated measurement. + Parameters + ---------- + measurement : ndarray + Bounding box coordinates (x, y, a, h) with center position (x, y), + aspect ratio a, and height h. + Returns + ------- + (ndarray, ndarray) + Returns the mean vector (8 dimensional) and covariance matrix (8x8 + dimensional) of the new track. Unobserved velocities are initialized + to 0 mean. + """ + mean_pos = measurement + mean_vel = np.zeros_like(mean_pos) + mean = np.r_[mean_pos, mean_vel] + + std = [ + 2 * self._std_weight_position * measurement[3], + 2 * self._std_weight_position * measurement[3], + 1e-2, + 2 * self._std_weight_position * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 1e-5, + 10 * self._std_weight_velocity * measurement[3]] + covariance = np.diag(np.square(std)) + return mean, covariance + + def predict(self, mean, covariance): + """Run Kalman filter prediction step. + Parameters + ---------- + mean : ndarray + The 8 dimensional mean vector of the object state at the previous + time step. + covariance : ndarray + The 8x8 dimensional covariance matrix of the object state at the + previous time step. + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-2, + self._std_weight_position * mean[3]] + std_vel = [ + self._std_weight_velocity * mean[3], + self._std_weight_velocity * mean[3], + 1e-5, + self._std_weight_velocity * mean[3]] + motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) + + mean = np.dot(self._motion_mat, mean) + covariance = np.linalg.multi_dot(( + self._motion_mat, covariance, self._motion_mat.T)) + motion_cov + + return mean, covariance + + def project(self, mean, covariance): + """Project state distribution to measurement space. + Parameters + ---------- + mean : ndarray + The state's mean vector (8 dimensional array). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + Returns + ------- + (ndarray, ndarray) + Returns the projected mean and covariance matrix of the given state + estimate. + """ + std = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-1, + self._std_weight_position * mean[3]] + innovation_cov = np.diag(np.square(std)) + + mean = np.dot(self._update_mat, mean) + covariance = np.linalg.multi_dot(( + self._update_mat, covariance, self._update_mat.T)) + return mean, covariance + innovation_cov + + def update(self, mean, covariance, measurement): + """Run Kalman filter correction step. + Parameters + ---------- + mean : ndarray + The predicted state's mean vector (8 dimensional). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + measurement : ndarray + The 4 dimensional measurement vector (x, y, a, h), where (x, y) + is the center position, a the aspect ratio, and h the height of the + bounding box. + Returns + ------- + (ndarray, ndarray) + Returns the measurement-corrected state distribution. + """ + projected_mean, projected_cov = self.project(mean, covariance) + + chol_factor, lower = scipy.linalg.cho_factor( + projected_cov, lower=True, check_finite=False) + kalman_gain = scipy.linalg.cho_solve( + (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, + check_finite=False).T + innovation = measurement - projected_mean + + new_mean = mean + np.dot(innovation, kalman_gain.T) + new_covariance = covariance - np.linalg.multi_dot(( + kalman_gain, projected_cov, kalman_gain.T)) + return new_mean, new_covariance + + def gating_distance(self, mean, covariance, measurements, + only_position=False): + """Compute gating distance between state distribution and measurements. + A suitable distance threshold can be obtained from `chi2inv95`. If + `only_position` is False, the chi-square distribution has 4 degrees of + freedom, otherwise 2. + Parameters + ---------- + mean : ndarray + Mean vector over the state distribution (8 dimensional). + covariance : ndarray + Covariance of the state distribution (8x8 dimensional). + measurements : ndarray + An Nx4 dimensional matrix of N measurements, each in + format (x, y, a, h) where (x, y) is the bounding box center + position, a the aspect ratio, and h the height. + only_position : Optional[bool] + If True, distance computation is done with respect to the bounding + box center position only. + Returns + ------- + ndarray + Returns an array of length N, where the i-th element contains the + squared Mahalanobis distance between (mean, covariance) and + `measurements[i]`. + """ + mean, covariance = self.project(mean, covariance) + if only_position: + mean, covariance = mean[:2], covariance[:2, :2] + measurements = measurements[:, :2] + + cholesky_factor = np.linalg.cholesky(covariance) + d = measurements - mean + z = scipy.linalg.solve_triangular( + cholesky_factor, d.T, lower=True, check_finite=False, + overwrite_b=True) + squared_maha = np.sum(z * z, axis=0) + return squared_maha \ No newline at end of file diff --git a/yolox/deepsort_tracker/linear_assignment.py b/yolox/deepsort_tracker/linear_assignment.py new file mode 100644 index 0000000000000000000000000000000000000000..5651893225d410b0a2144f9624810e4a98fac75c --- /dev/null +++ b/yolox/deepsort_tracker/linear_assignment.py @@ -0,0 +1,182 @@ +from __future__ import absolute_import +import numpy as np +# from sklearn.utils.linear_assignment_ import linear_assignment +from scipy.optimize import linear_sum_assignment as linear_assignment +from yolox.deepsort_tracker import kalman_filter + + +INFTY_COST = 1e+5 + + +def min_cost_matching( + distance_metric, max_distance, tracks, detections, track_indices=None, + detection_indices=None): + """Solve linear assignment problem. + Parameters + ---------- + distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray + The distance metric is given a list of tracks and detections as well as + a list of N track indices and M detection indices. The metric should + return the NxM dimensional cost matrix, where element (i, j) is the + association cost between the i-th track in the given track indices and + the j-th detection in the given detection_indices. + max_distance : float + Gating threshold. Associations with cost larger than this value are + disregarded. + tracks : List[track.Track] + A list of predicted tracks at the current time step. + detections : List[detection.Detection] + A list of detections at the current time step. + track_indices : List[int] + List of track indices that maps rows in `cost_matrix` to tracks in + `tracks` (see description above). + detection_indices : List[int] + List of detection indices that maps columns in `cost_matrix` to + detections in `detections` (see description above). + Returns + ------- + (List[(int, int)], List[int], List[int]) + Returns a tuple with the following three entries: + * A list of matched track and detection indices. + * A list of unmatched track indices. + * A list of unmatched detection indices. + """ + if track_indices is None: + track_indices = np.arange(len(tracks)) + if detection_indices is None: + detection_indices = np.arange(len(detections)) + + if len(detection_indices) == 0 or len(track_indices) == 0: + return [], track_indices, detection_indices # Nothing to match. + + cost_matrix = distance_metric( + tracks, detections, track_indices, detection_indices) + cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5 + + row_indices, col_indices = linear_assignment(cost_matrix) + + matches, unmatched_tracks, unmatched_detections = [], [], [] + for col, detection_idx in enumerate(detection_indices): + if col not in col_indices: + unmatched_detections.append(detection_idx) + for row, track_idx in enumerate(track_indices): + if row not in row_indices: + unmatched_tracks.append(track_idx) + for row, col in zip(row_indices, col_indices): + track_idx = track_indices[row] + detection_idx = detection_indices[col] + if cost_matrix[row, col] > max_distance: + unmatched_tracks.append(track_idx) + unmatched_detections.append(detection_idx) + else: + matches.append((track_idx, detection_idx)) + return matches, unmatched_tracks, unmatched_detections + + +def matching_cascade( + distance_metric, max_distance, cascade_depth, tracks, detections, + track_indices=None, detection_indices=None): + """Run matching cascade. + Parameters + ---------- + distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray + The distance metric is given a list of tracks and detections as well as + a list of N track indices and M detection indices. The metric should + return the NxM dimensional cost matrix, where element (i, j) is the + association cost between the i-th track in the given track indices and + the j-th detection in the given detection indices. + max_distance : float + Gating threshold. Associations with cost larger than this value are + disregarded. + cascade_depth: int + The cascade depth, should be se to the maximum track age. + tracks : List[track.Track] + A list of predicted tracks at the current time step. + detections : List[detection.Detection] + A list of detections at the current time step. + track_indices : Optional[List[int]] + List of track indices that maps rows in `cost_matrix` to tracks in + `tracks` (see description above). Defaults to all tracks. + detection_indices : Optional[List[int]] + List of detection indices that maps columns in `cost_matrix` to + detections in `detections` (see description above). Defaults to all + detections. + Returns + ------- + (List[(int, int)], List[int], List[int]) + Returns a tuple with the following three entries: + * A list of matched track and detection indices. + * A list of unmatched track indices. + * A list of unmatched detection indices. + """ + if track_indices is None: + track_indices = list(range(len(tracks))) + if detection_indices is None: + detection_indices = list(range(len(detections))) + + unmatched_detections = detection_indices + matches = [] + for level in range(cascade_depth): + if len(unmatched_detections) == 0: # No detections left + break + + track_indices_l = [ + k for k in track_indices + if tracks[k].time_since_update == 1 + level + ] + if len(track_indices_l) == 0: # Nothing to match at this level + continue + + matches_l, _, unmatched_detections = \ + min_cost_matching( + distance_metric, max_distance, tracks, detections, + track_indices_l, unmatched_detections) + matches += matches_l + unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches)) + return matches, unmatched_tracks, unmatched_detections + + +def gate_cost_matrix( + kf, cost_matrix, tracks, detections, track_indices, detection_indices, + gated_cost=INFTY_COST, only_position=False): + """Invalidate infeasible entries in cost matrix based on the state + distributions obtained by Kalman filtering. + Parameters + ---------- + kf : The Kalman filter. + cost_matrix : ndarray + The NxM dimensional cost matrix, where N is the number of track indices + and M is the number of detection indices, such that entry (i, j) is the + association cost between `tracks[track_indices[i]]` and + `detections[detection_indices[j]]`. + tracks : List[track.Track] + A list of predicted tracks at the current time step. + detections : List[detection.Detection] + A list of detections at the current time step. + track_indices : List[int] + List of track indices that maps rows in `cost_matrix` to tracks in + `tracks` (see description above). + detection_indices : List[int] + List of detection indices that maps columns in `cost_matrix` to + detections in `detections` (see description above). + gated_cost : Optional[float] + Entries in the cost matrix corresponding to infeasible associations are + set this value. Defaults to a very large value. + only_position : Optional[bool] + If True, only the x, y position of the state distribution is considered + during gating. Defaults to False. + Returns + ------- + ndarray + Returns the modified cost matrix. + """ + gating_dim = 2 if only_position else 4 + gating_threshold = kalman_filter.chi2inv95[gating_dim] + measurements = np.asarray( + [detections[i].to_xyah() for i in detection_indices]) + for row, track_idx in enumerate(track_indices): + track = tracks[track_idx] + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position) + cost_matrix[row, gating_distance > gating_threshold] = gated_cost + return cost_matrix \ No newline at end of file diff --git a/yolox/deepsort_tracker/reid_model.py b/yolox/deepsort_tracker/reid_model.py new file mode 100644 index 0000000000000000000000000000000000000000..6aff8853f0859b16b33b178a3ada445f755a7027 --- /dev/null +++ b/yolox/deepsort_tracker/reid_model.py @@ -0,0 +1,145 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +import cv2 +import logging +import torchvision.transforms as transforms + + +class BasicBlock(nn.Module): + def __init__(self, c_in, c_out, is_downsample=False): + super(BasicBlock, self).__init__() + self.is_downsample = is_downsample + if is_downsample: + self.conv1 = nn.Conv2d( + c_in, c_out, 3, stride=2, padding=1, bias=False) + else: + self.conv1 = nn.Conv2d( + c_in, c_out, 3, stride=1, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(c_out) + self.relu = nn.ReLU(True) + self.conv2 = nn.Conv2d(c_out, c_out, 3, stride=1, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(c_out) + if is_downsample: + self.downsample = nn.Sequential( + nn.Conv2d(c_in, c_out, 1, stride=2, bias=False), + nn.BatchNorm2d(c_out) + ) + elif c_in != c_out: + self.downsample = nn.Sequential( + nn.Conv2d(c_in, c_out, 1, stride=1, bias=False), + nn.BatchNorm2d(c_out) + ) + self.is_downsample = True + + def forward(self, x): + y = self.conv1(x) + y = self.bn1(y) + y = self.relu(y) + y = self.conv2(y) + y = self.bn2(y) + if self.is_downsample: + x = self.downsample(x) + return F.relu(x.add(y), True) + + +def make_layers(c_in, c_out, repeat_times, is_downsample=False): + blocks = [] + for i in range(repeat_times): + if i == 0: + blocks += [BasicBlock(c_in, c_out, is_downsample=is_downsample), ] + else: + blocks += [BasicBlock(c_out, c_out), ] + return nn.Sequential(*blocks) + + +class Net(nn.Module): + def __init__(self, num_classes=751, reid=False): + super(Net, self).__init__() + # 3 128 64 + self.conv = nn.Sequential( + nn.Conv2d(3, 64, 3, stride=1, padding=1), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True), + # nn.Conv2d(32,32,3,stride=1,padding=1), + # nn.BatchNorm2d(32), + # nn.ReLU(inplace=True), + nn.MaxPool2d(3, 2, padding=1), + ) + # 32 64 32 + self.layer1 = make_layers(64, 64, 2, False) + # 32 64 32 + self.layer2 = make_layers(64, 128, 2, True) + # 64 32 16 + self.layer3 = make_layers(128, 256, 2, True) + # 128 16 8 + self.layer4 = make_layers(256, 512, 2, True) + # 256 8 4 + self.avgpool = nn.AvgPool2d((8, 4), 1) + # 256 1 1 + self.reid = reid + self.classifier = nn.Sequential( + nn.Linear(512, 256), + nn.BatchNorm1d(256), + nn.ReLU(inplace=True), + nn.Dropout(), + nn.Linear(256, num_classes), + ) + + def forward(self, x): + x = self.conv(x) + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + x = self.avgpool(x) + x = x.view(x.size(0), -1) + # B x 128 + if self.reid: + x = x.div(x.norm(p=2, dim=1, keepdim=True)) + return x + # classifier + x = self.classifier(x) + return x + + +class Extractor(object): + def __init__(self, model_path, use_cuda=True): + self.net = Net(reid=True) + self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu" + state_dict = torch.load(model_path, map_location=torch.device(self.device))[ + 'net_dict'] + self.net.load_state_dict(state_dict) + logger = logging.getLogger("root.tracker") + logger.info("Loading weights from {}... Done!".format(model_path)) + self.net.to(self.device) + self.size = (64, 128) + self.norm = transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), + ]) + + def _preprocess(self, im_crops): + """ + TODO: + 1. to float with scale from 0 to 1 + 2. resize to (64, 128) as Market1501 dataset did + 3. concatenate to a numpy array + 3. to torch Tensor + 4. normalize + """ + def _resize(im, size): + return cv2.resize(im.astype(np.float32)/255., size) + + im_batch = torch.cat([self.norm(_resize(im, self.size)).unsqueeze( + 0) for im in im_crops], dim=0).float() + return im_batch + + def __call__(self, im_crops): + im_batch = self._preprocess(im_crops) + with torch.no_grad(): + im_batch = im_batch.to(self.device) + features = self.net(im_batch) + return features.cpu().numpy() \ No newline at end of file diff --git a/yolox/deepsort_tracker/track.py b/yolox/deepsort_tracker/track.py new file mode 100644 index 0000000000000000000000000000000000000000..6867441e016e80224fda6ecf3e0c7e8072be4e57 --- /dev/null +++ b/yolox/deepsort_tracker/track.py @@ -0,0 +1,158 @@ +# vim: expandtab:ts=4:sw=4 + + +class TrackState: + """ + Enumeration type for the single target track state. Newly created tracks are + classified as `tentative` until enough evidence has been collected. Then, + the track state is changed to `confirmed`. Tracks that are no longer alive + are classified as `deleted` to mark them for removal from the set of active + tracks. + """ + + Tentative = 1 + Confirmed = 2 + Deleted = 3 + + +class Track: + """ + A single target track with state space `(x, y, a, h)` and associated + velocities, where `(x, y)` is the center of the bounding box, `a` is the + aspect ratio and `h` is the height. + Parameters + ---------- + mean : ndarray + Mean vector of the initial state distribution. + covariance : ndarray + Covariance matrix of the initial state distribution. + track_id : int + A unique track identifier. + n_init : int + Number of consecutive detections before the track is confirmed. The + track state is set to `Deleted` if a miss occurs within the first + `n_init` frames. + max_age : int + The maximum number of consecutive misses before the track state is + set to `Deleted`. + feature : Optional[ndarray] + Feature vector of the detection this track originates from. If not None, + this feature is added to the `features` cache. + Attributes + ---------- + mean : ndarray + Mean vector of the initial state distribution. + covariance : ndarray + Covariance matrix of the initial state distribution. + track_id : int + A unique track identifier. + hits : int + Total number of measurement updates. + age : int + Total number of frames since first occurance. + time_since_update : int + Total number of frames since last measurement update. + state : TrackState + The current track state. + features : List[ndarray] + A cache of features. On each measurement update, the associated feature + vector is added to this list. + """ + + def __init__(self, mean, covariance, track_id, class_id, n_init, max_age, + feature=None): + self.mean = mean + self.covariance = covariance + self.track_id = track_id + self.class_id = class_id + self.hits = 1 + self.age = 1 + self.time_since_update = 0 + + self.state = TrackState.Tentative + self.features = [] + if feature is not None: + self.features.append(feature) + + self._n_init = n_init + self._max_age = max_age + + def to_tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + Returns + ------- + ndarray + The bounding box. + """ + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + def to_tlbr(self): + """Get current position in bounding box format `(min x, miny, max x, + max y)`. + Returns + ------- + ndarray + The bounding box. + """ + ret = self.to_tlwh() + ret[2:] = ret[:2] + ret[2:] + return ret + + def increment_age(self): + self.age += 1 + self.time_since_update += 1 + + def predict(self, kf): + """Propagate the state distribution to the current time step using a + Kalman filter prediction step. + Parameters + ---------- + kf : kalman_filter.KalmanFilter + The Kalman filter. + """ + self.mean, self.covariance = kf.predict(self.mean, self.covariance) + self.increment_age() + + def update(self, kf, detection): + """Perform Kalman filter measurement update step and update the feature + cache. + Parameters + ---------- + kf : kalman_filter.KalmanFilter + The Kalman filter. + detection : Detection + The associated detection. + """ + self.mean, self.covariance = kf.update( + self.mean, self.covariance, detection.to_xyah()) + self.features.append(detection.feature) + + self.hits += 1 + self.time_since_update = 0 + if self.state == TrackState.Tentative and self.hits >= self._n_init: + self.state = TrackState.Confirmed + + def mark_missed(self): + """Mark this track as missed (no association at the current time step). + """ + if self.state == TrackState.Tentative: + self.state = TrackState.Deleted + elif self.time_since_update > self._max_age: + self.state = TrackState.Deleted + + def is_tentative(self): + """Returns True if this track is tentative (unconfirmed). + """ + return self.state == TrackState.Tentative + + def is_confirmed(self): + """Returns True if this track is confirmed.""" + return self.state == TrackState.Confirmed + + def is_deleted(self): + """Returns True if this track is dead and should be deleted.""" + return self.state == TrackState.Deleted \ No newline at end of file diff --git a/yolox/evaluators/__init__.py b/yolox/evaluators/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5d704e05c79409fb053be1a8f8ce4676a015b054 --- /dev/null +++ b/yolox/evaluators/__init__.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +from .coco_evaluator import COCOEvaluator +from .mot_evaluator import MOTEvaluator diff --git a/yolox/evaluators/coco_evaluator.py b/yolox/evaluators/coco_evaluator.py new file mode 100644 index 0000000000000000000000000000000000000000..24dce235307cfe52062da31b0e06506b77b32b36 --- /dev/null +++ b/yolox/evaluators/coco_evaluator.py @@ -0,0 +1,224 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) Megvii, Inc. and its affiliates. + +from loguru import logger +from tqdm import tqdm + +import torch + +from yolox.utils import ( + gather, + is_main_process, + postprocess, + synchronize, + time_synchronized, + xyxy2xywh +) + +import contextlib +import io +import itertools +import json +import tempfile +import time + + +class COCOEvaluator: + """ + COCO AP Evaluation class. All the data in the val2017 dataset are processed + and evaluated by COCO API. + """ + + def __init__( + self, dataloader, img_size, confthre, nmsthre, num_classes, testdev=False + ): + """ + Args: + dataloader (Dataloader): evaluate dataloader. + img_size (int): image size after preprocess. images are resized + to squares whose shape is (img_size, img_size). + confthre (float): confidence threshold ranging from 0 to 1, which + is defined in the config file. + nmsthre (float): IoU threshold of non-max supression ranging from 0 to 1. + """ + self.dataloader = dataloader + self.img_size = img_size + self.confthre = confthre + self.nmsthre = nmsthre + self.num_classes = num_classes + self.testdev = testdev + + def evaluate( + self, + model, + distributed=False, + half=False, + trt_file=None, + decoder=None, + test_size=None, + ): + """ + COCO average precision (AP) Evaluation. Iterate inference on the test dataset + and the results are evaluated by COCO API. + + NOTE: This function will change training mode to False, please save states if needed. + + Args: + model : model to evaluate. + + Returns: + ap50_95 (float) : COCO AP of IoU=50:95 + ap50 (float) : COCO AP of IoU=50 + summary (sr): summary info of evaluation. + """ + # TODO half to amp_test + tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor + model = model.eval() + if half: + model = model.half() + ids = [] + data_list = [] + progress_bar = tqdm if is_main_process() else iter + + inference_time = 0 + nms_time = 0 + n_samples = len(self.dataloader) - 1 + + if trt_file is not None: + from torch2trt import TRTModule + + model_trt = TRTModule() + model_trt.load_state_dict(torch.load(trt_file)) + + x = torch.ones(1, 3, test_size[0], test_size[1]).cuda() + model(x) + model = model_trt + + for cur_iter, (imgs, _, info_imgs, ids) in enumerate( + progress_bar(self.dataloader) + ): + with torch.no_grad(): + imgs = imgs.type(tensor_type) + + # skip the the last iters since batchsize might be not enough for batch inference + is_time_record = cur_iter < len(self.dataloader) - 1 + if is_time_record: + start = time.time() + + outputs = model(imgs) + if decoder is not None: + outputs = decoder(outputs, dtype=outputs.type()) + + if is_time_record: + infer_end = time_synchronized() + inference_time += infer_end - start + + outputs = postprocess( + outputs, self.num_classes, self.confthre, self.nmsthre + ) + if is_time_record: + nms_end = time_synchronized() + nms_time += nms_end - infer_end + + data_list.extend(self.convert_to_coco_format(outputs, info_imgs, ids)) + + statistics = torch.cuda.FloatTensor([inference_time, nms_time, n_samples]) + if distributed: + data_list = gather(data_list, dst=0) + data_list = list(itertools.chain(*data_list)) + torch.distributed.reduce(statistics, dst=0) + + eval_results = self.evaluate_prediction(data_list, statistics) + synchronize() + return eval_results + + def convert_to_coco_format(self, outputs, info_imgs, ids): + data_list = [] + for (output, img_h, img_w, img_id) in zip( + outputs, info_imgs[0], info_imgs[1], ids + ): + if output is None: + continue + output = output.cpu() + + bboxes = output[:, 0:4] + + # preprocessing: resize + scale = min( + self.img_size[0] / float(img_h), self.img_size[1] / float(img_w) + ) + bboxes /= scale + bboxes = xyxy2xywh(bboxes) + + cls = output[:, 6] + scores = output[:, 4] * output[:, 5] + for ind in range(bboxes.shape[0]): + label = self.dataloader.dataset.class_ids[int(cls[ind])] + pred_data = { + "image_id": int(img_id), + "category_id": label, + "bbox": bboxes[ind].numpy().tolist(), + "score": scores[ind].numpy().item(), + "segmentation": [], + } # COCO json format + data_list.append(pred_data) + return data_list + + def evaluate_prediction(self, data_dict, statistics): + if not is_main_process(): + return 0, 0, None + + logger.info("Evaluate in main process...") + + annType = ["segm", "bbox", "keypoints"] + + inference_time = statistics[0].item() + nms_time = statistics[1].item() + n_samples = statistics[2].item() + + a_infer_time = 1000 * inference_time / (n_samples * self.dataloader.batch_size) + a_nms_time = 1000 * nms_time / (n_samples * self.dataloader.batch_size) + + time_info = ", ".join( + [ + "Average {} time: {:.2f} ms".format(k, v) + for k, v in zip( + ["forward", "NMS", "inference"], + [a_infer_time, a_nms_time, (a_infer_time + a_nms_time)], + ) + ] + ) + + info = time_info + "\n" + + # Evaluate the Dt (detection) json comparing with the ground truth + if len(data_dict) > 0: + cocoGt = self.dataloader.dataset.coco + # TODO: since pycocotools can't process dict in py36, write data to json file. + if self.testdev: + json.dump(data_dict, open("./yolox_testdev_2017.json", "w")) + cocoDt = cocoGt.loadRes("./yolox_testdev_2017.json") + else: + _, tmp = tempfile.mkstemp() + json.dump(data_dict, open(tmp, "w")) + cocoDt = cocoGt.loadRes(tmp) + ''' + try: + from yolox.layers import COCOeval_opt as COCOeval + except ImportError: + from pycocotools import cocoeval as COCOeval + logger.warning("Use standard COCOeval.") + ''' + #from pycocotools.cocoeval import COCOeval + from yolox.layers import COCOeval_opt as COCOeval + cocoEval = COCOeval(cocoGt, cocoDt, annType[1]) + cocoEval.evaluate() + cocoEval.accumulate() + redirect_string = io.StringIO() + with contextlib.redirect_stdout(redirect_string): + cocoEval.summarize() + info += redirect_string.getvalue() + return cocoEval.stats[0], cocoEval.stats[1], info + else: + return 0, 0, info diff --git a/yolox/evaluators/evaluation.py b/yolox/evaluators/evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..fd72f82adf4bacc73b564a855ce10082d89f76af --- /dev/null +++ b/yolox/evaluators/evaluation.py @@ -0,0 +1,200 @@ +import os +import numpy as np +import copy +import motmetrics as mm +mm.lap.default_solver = 'lap' + + +class Evaluator(object): + + def __init__(self, data_root, seq_name, data_type): + self.data_root = data_root + self.seq_name = seq_name + self.data_type = data_type + + self.load_annotations() + self.reset_accumulator() + + def load_annotations(self): + assert self.data_type == 'mot' + + gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt') + self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True) + self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True) + + def reset_accumulator(self): + self.acc = mm.MOTAccumulator(auto_id=True) + + def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False): + # results + trk_tlwhs = np.copy(trk_tlwhs) + trk_ids = np.copy(trk_ids) + + # gts + gt_objs = self.gt_frame_dict.get(frame_id, []) + gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2] + + # ignore boxes + ignore_objs = self.gt_ignore_frame_dict.get(frame_id, []) + ignore_tlwhs = unzip_objs(ignore_objs)[0] + + # remove ignored results + keep = np.ones(len(trk_tlwhs), dtype=bool) + iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5) + if len(iou_distance) > 0: + match_is, match_js = mm.lap.linear_sum_assignment(iou_distance) + match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js]) + match_ious = iou_distance[match_is, match_js] + + match_js = np.asarray(match_js, dtype=int) + match_js = match_js[np.logical_not(np.isnan(match_ious))] + keep[match_js] = False + trk_tlwhs = trk_tlwhs[keep] + trk_ids = trk_ids[keep] + #match_is, match_js = mm.lap.linear_sum_assignment(iou_distance) + #match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js]) + #match_ious = iou_distance[match_is, match_js] + + #match_js = np.asarray(match_js, dtype=int) + #match_js = match_js[np.logical_not(np.isnan(match_ious))] + #keep[match_js] = False + #trk_tlwhs = trk_tlwhs[keep] + #trk_ids = trk_ids[keep] + + # get distance matrix + iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5) + + # acc + self.acc.update(gt_ids, trk_ids, iou_distance) + + if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'): + events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics + else: + events = None + return events + + def eval_file(self, filename): + self.reset_accumulator() + + result_frame_dict = read_results(filename, self.data_type, is_gt=False) + #frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys()))) + frames = sorted(list(set(result_frame_dict.keys()))) + for frame_id in frames: + trk_objs = result_frame_dict.get(frame_id, []) + trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2] + self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False) + + return self.acc + + @staticmethod + def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')): + names = copy.deepcopy(names) + if metrics is None: + metrics = mm.metrics.motchallenge_metrics + metrics = copy.deepcopy(metrics) + + mh = mm.metrics.create() + summary = mh.compute_many( + accs, + metrics=metrics, + names=names, + generate_overall=True + ) + + return summary + + @staticmethod + def save_summary(summary, filename): + import pandas as pd + writer = pd.ExcelWriter(filename) + summary.to_excel(writer) + writer.save() + + + + + +def read_results(filename, data_type: str, is_gt=False, is_ignore=False): + if data_type in ('mot', 'lab'): + read_fun = read_mot_results + else: + raise ValueError('Unknown data type: {}'.format(data_type)) + + return read_fun(filename, is_gt, is_ignore) + + +""" +labels={'ped', ... % 1 +'person_on_vhcl', ... % 2 +'car', ... % 3 +'bicycle', ... % 4 +'mbike', ... % 5 +'non_mot_vhcl', ... % 6 +'static_person', ... % 7 +'distractor', ... % 8 +'occluder', ... % 9 +'occluder_on_grnd', ... %10 +'occluder_full', ... % 11 +'reflection', ... % 12 +'crowd' ... % 13 +}; +""" + + +def read_mot_results(filename, is_gt, is_ignore): + valid_labels = {1} + ignore_labels = {2, 7, 8, 12} + results_dict = dict() + if os.path.isfile(filename): + with open(filename, 'r') as f: + for line in f.readlines(): + linelist = line.split(',') + if len(linelist) < 7: + continue + fid = int(linelist[0]) + if fid < 1: + continue + results_dict.setdefault(fid, list()) + + box_size = float(linelist[4]) * float(linelist[5]) + + if is_gt: + if 'MOT16-' in filename or 'MOT17-' in filename: + label = int(float(linelist[7])) + mark = int(float(linelist[6])) + if mark == 0 or label not in valid_labels: + continue + score = 1 + elif is_ignore: + if 'MOT16-' in filename or 'MOT17-' in filename: + label = int(float(linelist[7])) + vis_ratio = float(linelist[8]) + if label not in ignore_labels and vis_ratio >= 0: + continue + else: + continue + score = 1 + else: + score = float(linelist[6]) + + #if box_size > 7000: + #if box_size <= 7000 or box_size >= 15000: + #if box_size < 15000: + #continue + + tlwh = tuple(map(float, linelist[2:6])) + target_id = int(linelist[1]) + + results_dict[fid].append((tlwh, target_id, score)) + + return results_dict + + +def unzip_objs(objs): + if len(objs) > 0: + tlwhs, ids, scores = zip(*objs) + else: + tlwhs, ids, scores = [], [], [] + tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4) + + return tlwhs, ids, scores \ No newline at end of file diff --git a/yolox/evaluators/mot_evaluator.py b/yolox/evaluators/mot_evaluator.py new file mode 100644 index 0000000000000000000000000000000000000000..becec47deadf7fd8345b477df9bac151bab7241d --- /dev/null +++ b/yolox/evaluators/mot_evaluator.py @@ -0,0 +1,679 @@ +from collections import defaultdict +from loguru import logger +from tqdm import tqdm + +import torch + +from yolox.utils import ( + gather, + is_main_process, + postprocess, + synchronize, + time_synchronized, + xyxy2xywh +) +from yolox.tracker.byte_tracker import BYTETracker +from yolox.sort_tracker.sort import Sort +from yolox.deepsort_tracker.deepsort import DeepSort +from yolox.motdt_tracker.motdt_tracker import OnlineTracker + +import contextlib +import io +import os +import itertools +import json +import tempfile +import time + + +def write_results(filename, results): + save_format = '{frame},{id},{x1},{y1},{w},{h},{s},-1,-1,-1\n' + with open(filename, 'w') as f: + for frame_id, tlwhs, track_ids, scores in results: + for tlwh, track_id, score in zip(tlwhs, track_ids, scores): + if track_id < 0: + continue + x1, y1, w, h = tlwh + line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1), s=round(score, 2)) + f.write(line) + logger.info('save results to {}'.format(filename)) + + +def write_results_no_score(filename, results): + save_format = '{frame},{id},{x1},{y1},{w},{h},-1,-1,-1,-1\n' + with open(filename, 'w') as f: + for frame_id, tlwhs, track_ids in results: + for tlwh, track_id in zip(tlwhs, track_ids): + if track_id < 0: + continue + x1, y1, w, h = tlwh + line = save_format.format(frame=frame_id, id=track_id, x1=round(x1, 1), y1=round(y1, 1), w=round(w, 1), h=round(h, 1)) + f.write(line) + logger.info('save results to {}'.format(filename)) + + +class MOTEvaluator: + """ + COCO AP Evaluation class. All the data in the val2017 dataset are processed + and evaluated by COCO API. + """ + + def __init__( + self, args, dataloader, img_size, confthre, nmsthre, num_classes): + """ + Args: + dataloader (Dataloader): evaluate dataloader. + img_size (int): image size after preprocess. images are resized + to squares whose shape is (img_size, img_size). + confthre (float): confidence threshold ranging from 0 to 1, which + is defined in the config file. + nmsthre (float): IoU threshold of non-max supression ranging from 0 to 1. + """ + self.dataloader = dataloader + self.img_size = img_size + self.confthre = confthre + self.nmsthre = nmsthre + self.num_classes = num_classes + self.args = args + + def evaluate( + self, + model, + distributed=False, + half=False, + trt_file=None, + decoder=None, + test_size=None, + result_folder=None + ): + """ + COCO average precision (AP) Evaluation. Iterate inference on the test dataset + and the results are evaluated by COCO API. + + NOTE: This function will change training mode to False, please save states if needed. + + Args: + model : model to evaluate. + + Returns: + ap50_95 (float) : COCO AP of IoU=50:95 + ap50 (float) : COCO AP of IoU=50 + summary (sr): summary info of evaluation. + """ + # TODO half to amp_test + tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor + model = model.eval() + if half: + model = model.half() + ids = [] + data_list = [] + results = [] + video_names = defaultdict() + progress_bar = tqdm if is_main_process() else iter + + inference_time = 0 + track_time = 0 + n_samples = len(self.dataloader) - 1 + + if trt_file is not None: + from torch2trt import TRTModule + + model_trt = TRTModule() + model_trt.load_state_dict(torch.load(trt_file)) + + x = torch.ones(1, 3, test_size[0], test_size[1]).cuda() + model(x) + model = model_trt + + tracker = BYTETracker(self.args) + ori_thresh = self.args.track_thresh + for cur_iter, (imgs, _, info_imgs, ids) in enumerate( + progress_bar(self.dataloader) + ): + with torch.no_grad(): + # init tracker + frame_id = info_imgs[2].item() + video_id = info_imgs[3].item() + img_file_name = info_imgs[4] + video_name = img_file_name[0].split('/')[0] + if video_name == 'MOT17-05-FRCNN' or video_name == 'MOT17-06-FRCNN': + self.args.track_buffer = 14 + elif video_name == 'MOT17-13-FRCNN' or video_name == 'MOT17-14-FRCNN': + self.args.track_buffer = 25 + else: + self.args.track_buffer = 30 + + if video_name == 'MOT17-01-FRCNN': + self.args.track_thresh = 0.65 + elif video_name == 'MOT17-06-FRCNN': + self.args.track_thresh = 0.65 + elif video_name == 'MOT17-12-FRCNN': + self.args.track_thresh = 0.7 + elif video_name == 'MOT17-14-FRCNN': + self.args.track_thresh = 0.67 + else: + self.args.track_thresh = ori_thresh + + if video_name == 'MOT20-06' or video_name == 'MOT20-08': + self.args.track_thresh = 0.3 + else: + self.args.track_thresh = ori_thresh + + if video_name not in video_names: + video_names[video_id] = video_name + if frame_id == 1: + tracker = BYTETracker(self.args) + if len(results) != 0: + result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1])) + write_results(result_filename, results) + results = [] + + imgs = imgs.type(tensor_type) + + # skip the the last iters since batchsize might be not enough for batch inference + is_time_record = cur_iter < len(self.dataloader) - 1 + if is_time_record: + start = time.time() + + outputs = model(imgs) + if decoder is not None: + outputs = decoder(outputs, dtype=outputs.type()) + + outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre) + + if is_time_record: + infer_end = time_synchronized() + inference_time += infer_end - start + + output_results = self.convert_to_coco_format(outputs, info_imgs, ids) + data_list.extend(output_results) + + # run tracking + if outputs[0] is not None: + online_targets = tracker.update(outputs[0], info_imgs, self.img_size) + online_tlwhs = [] + online_ids = [] + online_scores = [] + for t in online_targets: + tlwh = t.tlwh + tid = t.track_id + vertical = tlwh[2] / tlwh[3] > 1.6 + if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical: + online_tlwhs.append(tlwh) + online_ids.append(tid) + online_scores.append(t.score) + # save results + results.append((frame_id, online_tlwhs, online_ids, online_scores)) + + if is_time_record: + track_end = time_synchronized() + track_time += track_end - infer_end + + if cur_iter == len(self.dataloader) - 1: + result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id])) + write_results(result_filename, results) + + statistics = torch.cuda.FloatTensor([inference_time, track_time, n_samples]) + if distributed: + data_list = gather(data_list, dst=0) + data_list = list(itertools.chain(*data_list)) + torch.distributed.reduce(statistics, dst=0) + + eval_results = self.evaluate_prediction(data_list, statistics) + synchronize() + return eval_results + + def evaluate_sort( + self, + model, + distributed=False, + half=False, + trt_file=None, + decoder=None, + test_size=None, + result_folder=None + ): + """ + COCO average precision (AP) Evaluation. Iterate inference on the test dataset + and the results are evaluated by COCO API. + + NOTE: This function will change training mode to False, please save states if needed. + + Args: + model : model to evaluate. + + Returns: + ap50_95 (float) : COCO AP of IoU=50:95 + ap50 (float) : COCO AP of IoU=50 + summary (sr): summary info of evaluation. + """ + # TODO half to amp_test + tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor + model = model.eval() + if half: + model = model.half() + ids = [] + data_list = [] + results = [] + video_names = defaultdict() + progress_bar = tqdm if is_main_process() else iter + + inference_time = 0 + track_time = 0 + n_samples = len(self.dataloader) - 1 + + if trt_file is not None: + from torch2trt import TRTModule + + model_trt = TRTModule() + model_trt.load_state_dict(torch.load(trt_file)) + + x = torch.ones(1, 3, test_size[0], test_size[1]).cuda() + model(x) + model = model_trt + + tracker = Sort(self.args.track_thresh) + + for cur_iter, (imgs, _, info_imgs, ids) in enumerate( + progress_bar(self.dataloader) + ): + with torch.no_grad(): + # init tracker + frame_id = info_imgs[2].item() + video_id = info_imgs[3].item() + img_file_name = info_imgs[4] + video_name = img_file_name[0].split('/')[0] + + if video_name not in video_names: + video_names[video_id] = video_name + if frame_id == 1: + tracker = Sort(self.args.track_thresh) + if len(results) != 0: + result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1])) + write_results_no_score(result_filename, results) + results = [] + + imgs = imgs.type(tensor_type) + + # skip the the last iters since batchsize might be not enough for batch inference + is_time_record = cur_iter < len(self.dataloader) - 1 + if is_time_record: + start = time.time() + + outputs = model(imgs) + if decoder is not None: + outputs = decoder(outputs, dtype=outputs.type()) + + outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre) + + if is_time_record: + infer_end = time_synchronized() + inference_time += infer_end - start + + output_results = self.convert_to_coco_format(outputs, info_imgs, ids) + data_list.extend(output_results) + + # run tracking + online_targets = tracker.update(outputs[0], info_imgs, self.img_size) + online_tlwhs = [] + online_ids = [] + for t in online_targets: + tlwh = [t[0], t[1], t[2] - t[0], t[3] - t[1]] + tid = t[4] + vertical = tlwh[2] / tlwh[3] > 1.6 + if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical: + online_tlwhs.append(tlwh) + online_ids.append(tid) + # save results + results.append((frame_id, online_tlwhs, online_ids)) + + if is_time_record: + track_end = time_synchronized() + track_time += track_end - infer_end + + if cur_iter == len(self.dataloader) - 1: + result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id])) + write_results_no_score(result_filename, results) + + statistics = torch.cuda.FloatTensor([inference_time, track_time, n_samples]) + if distributed: + data_list = gather(data_list, dst=0) + data_list = list(itertools.chain(*data_list)) + torch.distributed.reduce(statistics, dst=0) + + eval_results = self.evaluate_prediction(data_list, statistics) + synchronize() + return eval_results + + def evaluate_deepsort( + self, + model, + distributed=False, + half=False, + trt_file=None, + decoder=None, + test_size=None, + result_folder=None, + model_folder=None + ): + """ + COCO average precision (AP) Evaluation. Iterate inference on the test dataset + and the results are evaluated by COCO API. + + NOTE: This function will change training mode to False, please save states if needed. + + Args: + model : model to evaluate. + + Returns: + ap50_95 (float) : COCO AP of IoU=50:95 + ap50 (float) : COCO AP of IoU=50 + summary (sr): summary info of evaluation. + """ + # TODO half to amp_test + tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor + model = model.eval() + if half: + model = model.half() + ids = [] + data_list = [] + results = [] + video_names = defaultdict() + progress_bar = tqdm if is_main_process() else iter + + inference_time = 0 + track_time = 0 + n_samples = len(self.dataloader) - 1 + + if trt_file is not None: + from torch2trt import TRTModule + + model_trt = TRTModule() + model_trt.load_state_dict(torch.load(trt_file)) + + x = torch.ones(1, 3, test_size[0], test_size[1]).cuda() + model(x) + model = model_trt + + tracker = DeepSort(model_folder, min_confidence=self.args.track_thresh) + + for cur_iter, (imgs, _, info_imgs, ids) in enumerate( + progress_bar(self.dataloader) + ): + with torch.no_grad(): + # init tracker + frame_id = info_imgs[2].item() + video_id = info_imgs[3].item() + img_file_name = info_imgs[4] + video_name = img_file_name[0].split('/')[0] + + if video_name not in video_names: + video_names[video_id] = video_name + if frame_id == 1: + tracker = DeepSort(model_folder, min_confidence=self.args.track_thresh) + if len(results) != 0: + result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1])) + write_results_no_score(result_filename, results) + results = [] + + imgs = imgs.type(tensor_type) + + # skip the the last iters since batchsize might be not enough for batch inference + is_time_record = cur_iter < len(self.dataloader) - 1 + if is_time_record: + start = time.time() + + outputs = model(imgs) + if decoder is not None: + outputs = decoder(outputs, dtype=outputs.type()) + + outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre) + + if is_time_record: + infer_end = time_synchronized() + inference_time += infer_end - start + + output_results = self.convert_to_coco_format(outputs, info_imgs, ids) + data_list.extend(output_results) + + # run tracking + online_targets = tracker.update(outputs[0], info_imgs, self.img_size, img_file_name[0]) + online_tlwhs = [] + online_ids = [] + for t in online_targets: + tlwh = [t[0], t[1], t[2] - t[0], t[3] - t[1]] + tid = t[4] + vertical = tlwh[2] / tlwh[3] > 1.6 + if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical: + online_tlwhs.append(tlwh) + online_ids.append(tid) + # save results + results.append((frame_id, online_tlwhs, online_ids)) + + if is_time_record: + track_end = time_synchronized() + track_time += track_end - infer_end + + if cur_iter == len(self.dataloader) - 1: + result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id])) + write_results_no_score(result_filename, results) + + statistics = torch.cuda.FloatTensor([inference_time, track_time, n_samples]) + if distributed: + data_list = gather(data_list, dst=0) + data_list = list(itertools.chain(*data_list)) + torch.distributed.reduce(statistics, dst=0) + + eval_results = self.evaluate_prediction(data_list, statistics) + synchronize() + return eval_results + + def evaluate_motdt( + self, + model, + distributed=False, + half=False, + trt_file=None, + decoder=None, + test_size=None, + result_folder=None, + model_folder=None + ): + """ + COCO average precision (AP) Evaluation. Iterate inference on the test dataset + and the results are evaluated by COCO API. + + NOTE: This function will change training mode to False, please save states if needed. + + Args: + model : model to evaluate. + + Returns: + ap50_95 (float) : COCO AP of IoU=50:95 + ap50 (float) : COCO AP of IoU=50 + summary (sr): summary info of evaluation. + """ + # TODO half to amp_test + tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor + model = model.eval() + if half: + model = model.half() + ids = [] + data_list = [] + results = [] + video_names = defaultdict() + progress_bar = tqdm if is_main_process() else iter + + inference_time = 0 + track_time = 0 + n_samples = len(self.dataloader) - 1 + + if trt_file is not None: + from torch2trt import TRTModule + + model_trt = TRTModule() + model_trt.load_state_dict(torch.load(trt_file)) + + x = torch.ones(1, 3, test_size[0], test_size[1]).cuda() + model(x) + model = model_trt + + tracker = OnlineTracker(model_folder, min_cls_score=self.args.track_thresh) + for cur_iter, (imgs, _, info_imgs, ids) in enumerate( + progress_bar(self.dataloader) + ): + with torch.no_grad(): + # init tracker + frame_id = info_imgs[2].item() + video_id = info_imgs[3].item() + img_file_name = info_imgs[4] + video_name = img_file_name[0].split('/')[0] + + if video_name not in video_names: + video_names[video_id] = video_name + if frame_id == 1: + tracker = OnlineTracker(model_folder, min_cls_score=self.args.track_thresh) + if len(results) != 0: + result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id - 1])) + write_results(result_filename, results) + results = [] + + imgs = imgs.type(tensor_type) + + # skip the the last iters since batchsize might be not enough for batch inference + is_time_record = cur_iter < len(self.dataloader) - 1 + if is_time_record: + start = time.time() + + outputs = model(imgs) + if decoder is not None: + outputs = decoder(outputs, dtype=outputs.type()) + + outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre) + + if is_time_record: + infer_end = time_synchronized() + inference_time += infer_end - start + + output_results = self.convert_to_coco_format(outputs, info_imgs, ids) + data_list.extend(output_results) + + # run tracking + online_targets = tracker.update(outputs[0], info_imgs, self.img_size, img_file_name[0]) + online_tlwhs = [] + online_ids = [] + online_scores = [] + for t in online_targets: + tlwh = t.tlwh + tid = t.track_id + vertical = tlwh[2] / tlwh[3] > 1.6 + if tlwh[2] * tlwh[3] > self.args.min_box_area and not vertical: + online_tlwhs.append(tlwh) + online_ids.append(tid) + online_scores.append(t.score) + # save results + results.append((frame_id, online_tlwhs, online_ids, online_scores)) + + if is_time_record: + track_end = time_synchronized() + track_time += track_end - infer_end + + if cur_iter == len(self.dataloader) - 1: + result_filename = os.path.join(result_folder, '{}.txt'.format(video_names[video_id])) + write_results(result_filename, results) + + statistics = torch.cuda.FloatTensor([inference_time, track_time, n_samples]) + if distributed: + data_list = gather(data_list, dst=0) + data_list = list(itertools.chain(*data_list)) + torch.distributed.reduce(statistics, dst=0) + + eval_results = self.evaluate_prediction(data_list, statistics) + synchronize() + return eval_results + + def convert_to_coco_format(self, outputs, info_imgs, ids): + data_list = [] + for (output, img_h, img_w, img_id) in zip( + outputs, info_imgs[0], info_imgs[1], ids + ): + if output is None: + continue + output = output.cpu() + + bboxes = output[:, 0:4] + + # preprocessing: resize + scale = min( + self.img_size[0] / float(img_h), self.img_size[1] / float(img_w) + ) + bboxes /= scale + bboxes = xyxy2xywh(bboxes) + + cls = output[:, 6] + scores = output[:, 4] * output[:, 5] + for ind in range(bboxes.shape[0]): + label = self.dataloader.dataset.class_ids[int(cls[ind])] + pred_data = { + "image_id": int(img_id), + "category_id": label, + "bbox": bboxes[ind].numpy().tolist(), + "score": scores[ind].numpy().item(), + "segmentation": [], + } # COCO json format + data_list.append(pred_data) + return data_list + + def evaluate_prediction(self, data_dict, statistics): + if not is_main_process(): + return 0, 0, None + + logger.info("Evaluate in main process...") + + annType = ["segm", "bbox", "keypoints"] + + inference_time = statistics[0].item() + track_time = statistics[1].item() + n_samples = statistics[2].item() + + a_infer_time = 1000 * inference_time / (n_samples * self.dataloader.batch_size) + a_track_time = 1000 * track_time / (n_samples * self.dataloader.batch_size) + + time_info = ", ".join( + [ + "Average {} time: {:.2f} ms".format(k, v) + for k, v in zip( + ["forward", "track", "inference"], + [a_infer_time, a_track_time, (a_infer_time + a_track_time)], + ) + ] + ) + + info = time_info + "\n" + + # Evaluate the Dt (detection) json comparing with the ground truth + if len(data_dict) > 0: + cocoGt = self.dataloader.dataset.coco + # TODO: since pycocotools can't process dict in py36, write data to json file. + _, tmp = tempfile.mkstemp() + json.dump(data_dict, open(tmp, "w")) + cocoDt = cocoGt.loadRes(tmp) + ''' + try: + from yolox.layers import COCOeval_opt as COCOeval + except ImportError: + from pycocotools import cocoeval as COCOeval + logger.warning("Use standard COCOeval.") + ''' + #from pycocotools.cocoeval import COCOeval + from yolox.layers import COCOeval_opt as COCOeval + cocoEval = COCOeval(cocoGt, cocoDt, annType[1]) + cocoEval.evaluate() + cocoEval.accumulate() + redirect_string = io.StringIO() + with contextlib.redirect_stdout(redirect_string): + cocoEval.summarize() + info += redirect_string.getvalue() + return cocoEval.stats[0], cocoEval.stats[1], info + else: + return 0, 0, info diff --git a/yolox/exp/__init__.py b/yolox/exp/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..951195cb905195145ac10a6b9aefd84f9d9c3b03 --- /dev/null +++ b/yolox/exp/__init__.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +from .base_exp import BaseExp +from .build import get_exp +from .yolox_base import Exp diff --git a/yolox/exp/base_exp.py b/yolox/exp/base_exp.py new file mode 100644 index 0000000000000000000000000000000000000000..c4aed89f55ad742cf32d1ba6c7bd7363609a222e --- /dev/null +++ b/yolox/exp/base_exp.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +import torch +from torch.nn import Module + +from yolox.utils import LRScheduler + +import ast +import pprint +from abc import ABCMeta, abstractmethod +from tabulate import tabulate +from typing import Dict + + +class BaseExp(metaclass=ABCMeta): + """Basic class for any experiment.""" + + def __init__(self): + self.seed = None + self.output_dir = "./YOLOX_outputs" + self.print_interval = 100 + self.eval_interval = 10 + + @abstractmethod + def get_model(self) -> Module: + pass + + @abstractmethod + def get_data_loader( + self, batch_size: int, is_distributed: bool + ) -> Dict[str, torch.utils.data.DataLoader]: + pass + + @abstractmethod + def get_optimizer(self, batch_size: int) -> torch.optim.Optimizer: + pass + + @abstractmethod + def get_lr_scheduler( + self, lr: float, iters_per_epoch: int, **kwargs + ) -> LRScheduler: + pass + + @abstractmethod + def get_evaluator(self): + pass + + @abstractmethod + def eval(self, model, evaluator, weights): + pass + + def __repr__(self): + table_header = ["keys", "values"] + exp_table = [ + (str(k), pprint.pformat(v)) + for k, v in vars(self).items() + if not k.startswith("_") + ] + return tabulate(exp_table, headers=table_header, tablefmt="fancy_grid") + + def merge(self, cfg_list): + assert len(cfg_list) % 2 == 0 + for k, v in zip(cfg_list[0::2], cfg_list[1::2]): + # only update value with same key + if hasattr(self, k): + src_value = getattr(self, k) + src_type = type(src_value) + if src_value is not None and src_type != type(v): + try: + v = src_type(v) + except Exception: + v = ast.literal_eval(v) + setattr(self, k, v) diff --git a/yolox/exp/build.py b/yolox/exp/build.py new file mode 100644 index 0000000000000000000000000000000000000000..411b09cdc6b65dd4f9e4bcec1a54dc00bdfe347b --- /dev/null +++ b/yolox/exp/build.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +import importlib +import os +import sys + + +def get_exp_by_file(exp_file): + try: + sys.path.append(os.path.dirname(exp_file)) + current_exp = importlib.import_module(os.path.basename(exp_file).split(".")[0]) + exp = current_exp.Exp() + except Exception: + raise ImportError("{} doesn't contains class named 'Exp'".format(exp_file)) + return exp + + +def get_exp_by_name(exp_name): + import yolox + + yolox_path = os.path.dirname(os.path.dirname(yolox.__file__)) + filedict = { + "yolox-s": "yolox_s.py", + "yolox-m": "yolox_m.py", + "yolox-l": "yolox_l.py", + "yolox-x": "yolox_x.py", + "yolox-tiny": "yolox_tiny.py", + "yolox-nano": "nano.py", + "yolov3": "yolov3.py", + } + filename = filedict[exp_name] + exp_path = os.path.join(yolox_path, "exps", "default", filename) + return get_exp_by_file(exp_path) + + +def get_exp(exp_file, exp_name): + """ + get Exp object by file or name. If exp_file and exp_name + are both provided, get Exp by exp_file. + + Args: + exp_file (str): file path of experiment. + exp_name (str): name of experiment. "yolo-s", + """ + assert ( + exp_file is not None or exp_name is not None + ), "plz provide exp file or exp name." + if exp_file is not None: + return get_exp_by_file(exp_file) + else: + return get_exp_by_name(exp_name) diff --git a/yolox/exp/yolox_base.py b/yolox/exp/yolox_base.py new file mode 100644 index 0000000000000000000000000000000000000000..a1b1b03f6e79a246bea5644b030149b70dab81a6 --- /dev/null +++ b/yolox/exp/yolox_base.py @@ -0,0 +1,248 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +import torch +import torch.distributed as dist +import torch.nn as nn + +import os +import random + +from .base_exp import BaseExp + + +class Exp(BaseExp): + def __init__(self): + super().__init__() + + # ---------------- model config ---------------- # + self.num_classes = 80 + self.depth = 1.00 + self.width = 1.00 + + # ---------------- dataloader config ---------------- # + # set worker to 4 for shorter dataloader init time + self.data_num_workers = 4 + self.input_size = (640, 640) + self.random_size = (14, 26) + self.train_ann = "instances_train2017.json" + self.val_ann = "instances_val2017.json" + + # --------------- transform config ----------------- # + self.degrees = 10.0 + self.translate = 0.1 + self.scale = (0.1, 2) + self.mscale = (0.8, 1.6) + self.shear = 2.0 + self.perspective = 0.0 + self.enable_mixup = True + + # -------------- training config --------------------- # + self.warmup_epochs = 5 + self.max_epoch = 300 + self.warmup_lr = 0 + self.basic_lr_per_img = 0.01 / 64.0 + self.scheduler = "yoloxwarmcos" + self.no_aug_epochs = 15 + self.min_lr_ratio = 0.05 + self.ema = True + + self.weight_decay = 5e-4 + self.momentum = 0.9 + self.print_interval = 10 + self.eval_interval = 10 + self.exp_name = os.path.split(os.path.realpath(__file__))[1].split(".")[0] + + # ----------------- testing config ------------------ # + self.test_size = (640, 640) + self.test_conf = 0.001 + self.nmsthre = 0.65 + + def get_model(self): + from yolox.models import YOLOPAFPN, YOLOX, YOLOXHead + + def init_yolo(M): + for m in M.modules(): + if isinstance(m, nn.BatchNorm2d): + m.eps = 1e-3 + m.momentum = 0.03 + + if getattr(self, "model", None) is None: + in_channels = [256, 512, 1024] + backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels) + head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels) + self.model = YOLOX(backbone, head) + + self.model.apply(init_yolo) + self.model.head.initialize_biases(1e-2) + return self.model + + def get_data_loader(self, batch_size, is_distributed, no_aug=False): + from yolox.data import ( + COCODataset, + DataLoader, + InfiniteSampler, + MosaicDetection, + TrainTransform, + YoloBatchSampler + ) + + dataset = COCODataset( + data_dir=None, + json_file=self.train_ann, + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=50, + ), + ) + + dataset = MosaicDetection( + dataset, + mosaic=not no_aug, + img_size=self.input_size, + preproc=TrainTransform( + rgb_means=(0.485, 0.456, 0.406), + std=(0.229, 0.224, 0.225), + max_labels=120, + ), + degrees=self.degrees, + translate=self.translate, + scale=self.scale, + shear=self.shear, + perspective=self.perspective, + enable_mixup=self.enable_mixup, + ) + + self.dataset = dataset + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + + sampler = InfiniteSampler(len(self.dataset), seed=self.seed if self.seed else 0) + + batch_sampler = YoloBatchSampler( + sampler=sampler, + batch_size=batch_size, + drop_last=False, + input_dimension=self.input_size, + mosaic=not no_aug, + ) + + dataloader_kwargs = {"num_workers": self.data_num_workers, "pin_memory": True} + dataloader_kwargs["batch_sampler"] = batch_sampler + train_loader = DataLoader(self.dataset, **dataloader_kwargs) + + return train_loader + + def random_resize(self, data_loader, epoch, rank, is_distributed): + tensor = torch.LongTensor(2).cuda() + + if rank == 0: + size_factor = self.input_size[1] * 1.0 / self.input_size[0] + size = random.randint(*self.random_size) + size = (int(32 * size), 32 * int(size * size_factor)) + tensor[0] = size[0] + tensor[1] = size[1] + + if is_distributed: + dist.barrier() + dist.broadcast(tensor, 0) + + input_size = data_loader.change_input_dim( + multiple=(tensor[0].item(), tensor[1].item()), random_range=None + ) + return input_size + + def get_optimizer(self, batch_size): + if "optimizer" not in self.__dict__: + if self.warmup_epochs > 0: + lr = self.warmup_lr + else: + lr = self.basic_lr_per_img * batch_size + + pg0, pg1, pg2 = [], [], [] # optimizer parameter groups + + for k, v in self.model.named_modules(): + if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter): + pg2.append(v.bias) # biases + if isinstance(v, nn.BatchNorm2d) or "bn" in k: + pg0.append(v.weight) # no decay + elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter): + pg1.append(v.weight) # apply decay + + optimizer = torch.optim.SGD( + pg0, lr=lr, momentum=self.momentum, nesterov=True + ) + optimizer.add_param_group( + {"params": pg1, "weight_decay": self.weight_decay} + ) # add pg1 with weight_decay + optimizer.add_param_group({"params": pg2}) + self.optimizer = optimizer + + return self.optimizer + + def get_lr_scheduler(self, lr, iters_per_epoch): + from yolox.utils import LRScheduler + + scheduler = LRScheduler( + self.scheduler, + lr, + iters_per_epoch, + self.max_epoch, + warmup_epochs=self.warmup_epochs, + warmup_lr_start=self.warmup_lr, + no_aug_epochs=self.no_aug_epochs, + min_lr_ratio=self.min_lr_ratio, + ) + return scheduler + + def get_eval_loader(self, batch_size, is_distributed, testdev=False): + from yolox.data import COCODataset, ValTransform + + valdataset = COCODataset( + data_dir=None, + json_file=self.val_ann if not testdev else "image_info_test-dev2017.json", + name="val2017" if not testdev else "test2017", + img_size=self.test_size, + preproc=ValTransform( + rgb_means=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225) + ), + ) + + if is_distributed: + batch_size = batch_size // dist.get_world_size() + sampler = torch.utils.data.distributed.DistributedSampler( + valdataset, shuffle=False + ) + else: + sampler = torch.utils.data.SequentialSampler(valdataset) + + dataloader_kwargs = { + "num_workers": self.data_num_workers, + "pin_memory": True, + "sampler": sampler, + } + dataloader_kwargs["batch_size"] = batch_size + val_loader = torch.utils.data.DataLoader(valdataset, **dataloader_kwargs) + + return val_loader + + def get_evaluator(self, batch_size, is_distributed, testdev=False): + from yolox.evaluators import COCOEvaluator + + val_loader = self.get_eval_loader(batch_size, is_distributed, testdev=testdev) + evaluator = COCOEvaluator( + dataloader=val_loader, + img_size=self.test_size, + confthre=self.test_conf, + nmsthre=self.nmsthre, + num_classes=self.num_classes, + testdev=testdev, + ) + return evaluator + + def eval(self, model, evaluator, is_distributed, half=False): + return evaluator.evaluate(model, is_distributed, half) diff --git a/yolox/layers/__init__.py b/yolox/layers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..833947c3dfb9b8e50a7b5d478628bb681afb25bb --- /dev/null +++ b/yolox/layers/__init__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +from .fast_coco_eval_api import COCOeval_opt diff --git a/yolox/layers/csrc/cocoeval/cocoeval.cpp b/yolox/layers/csrc/cocoeval/cocoeval.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2e63bc9952918060f55999ec100b283d83616b46 --- /dev/null +++ b/yolox/layers/csrc/cocoeval/cocoeval.cpp @@ -0,0 +1,502 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +#include "cocoeval.h" +#include +#include +#include +#include + +using namespace pybind11::literals; + +namespace COCOeval { + +// Sort detections from highest score to lowest, such that +// detection_instances[detection_sorted_indices[t]] >= +// detection_instances[detection_sorted_indices[t+1]]. Use stable_sort to match +// original COCO API +void SortInstancesByDetectionScore( + const std::vector& detection_instances, + std::vector* detection_sorted_indices) { + detection_sorted_indices->resize(detection_instances.size()); + std::iota( + detection_sorted_indices->begin(), detection_sorted_indices->end(), 0); + std::stable_sort( + detection_sorted_indices->begin(), + detection_sorted_indices->end(), + [&detection_instances](size_t j1, size_t j2) { + return detection_instances[j1].score > detection_instances[j2].score; + }); +} + +// Partition the ground truth objects based on whether or not to ignore them +// based on area +void SortInstancesByIgnore( + const std::array& area_range, + const std::vector& ground_truth_instances, + std::vector* ground_truth_sorted_indices, + std::vector* ignores) { + ignores->clear(); + ignores->reserve(ground_truth_instances.size()); + for (auto o : ground_truth_instances) { + ignores->push_back( + o.ignore || o.area < area_range[0] || o.area > area_range[1]); + } + + ground_truth_sorted_indices->resize(ground_truth_instances.size()); + std::iota( + ground_truth_sorted_indices->begin(), + ground_truth_sorted_indices->end(), + 0); + std::stable_sort( + ground_truth_sorted_indices->begin(), + ground_truth_sorted_indices->end(), + [&ignores](size_t j1, size_t j2) { + return (int)(*ignores)[j1] < (int)(*ignores)[j2]; + }); +} + +// For each IOU threshold, greedily match each detected instance to a ground +// truth instance (if possible) and store the results +void MatchDetectionsToGroundTruth( + const std::vector& detection_instances, + const std::vector& detection_sorted_indices, + const std::vector& ground_truth_instances, + const std::vector& ground_truth_sorted_indices, + const std::vector& ignores, + const std::vector>& ious, + const std::vector& iou_thresholds, + const std::array& area_range, + ImageEvaluation* results) { + // Initialize memory to store return data matches and ignore + const int num_iou_thresholds = iou_thresholds.size(); + const int num_ground_truth = ground_truth_sorted_indices.size(); + const int num_detections = detection_sorted_indices.size(); + std::vector ground_truth_matches( + num_iou_thresholds * num_ground_truth, 0); + std::vector& detection_matches = results->detection_matches; + std::vector& detection_ignores = results->detection_ignores; + std::vector& ground_truth_ignores = results->ground_truth_ignores; + detection_matches.resize(num_iou_thresholds * num_detections, 0); + detection_ignores.resize(num_iou_thresholds * num_detections, false); + ground_truth_ignores.resize(num_ground_truth); + for (auto g = 0; g < num_ground_truth; ++g) { + ground_truth_ignores[g] = ignores[ground_truth_sorted_indices[g]]; + } + + for (auto t = 0; t < num_iou_thresholds; ++t) { + for (auto d = 0; d < num_detections; ++d) { + // information about best match so far (match=-1 -> unmatched) + double best_iou = std::min(iou_thresholds[t], 1 - 1e-10); + int match = -1; + for (auto g = 0; g < num_ground_truth; ++g) { + // if this ground truth instance is already matched and not a + // crowd, it cannot be matched to another detection + if (ground_truth_matches[t * num_ground_truth + g] > 0 && + !ground_truth_instances[ground_truth_sorted_indices[g]].is_crowd) { + continue; + } + + // if detected instance matched to a regular ground truth + // instance, we can break on the first ground truth instance + // tagged as ignore (because they are sorted by the ignore tag) + if (match >= 0 && !ground_truth_ignores[match] && + ground_truth_ignores[g]) { + break; + } + + // if IOU overlap is the best so far, store the match appropriately + if (ious[d][ground_truth_sorted_indices[g]] >= best_iou) { + best_iou = ious[d][ground_truth_sorted_indices[g]]; + match = g; + } + } + // if match was made, store id of match for both detection and + // ground truth + if (match >= 0) { + detection_ignores[t * num_detections + d] = ground_truth_ignores[match]; + detection_matches[t * num_detections + d] = + ground_truth_instances[ground_truth_sorted_indices[match]].id; + ground_truth_matches[t * num_ground_truth + match] = + detection_instances[detection_sorted_indices[d]].id; + } + + // set unmatched detections outside of area range to ignore + const InstanceAnnotation& detection = + detection_instances[detection_sorted_indices[d]]; + detection_ignores[t * num_detections + d] = + detection_ignores[t * num_detections + d] || + (detection_matches[t * num_detections + d] == 0 && + (detection.area < area_range[0] || detection.area > area_range[1])); + } + } + + // store detection score results + results->detection_scores.resize(detection_sorted_indices.size()); + for (size_t d = 0; d < detection_sorted_indices.size(); ++d) { + results->detection_scores[d] = + detection_instances[detection_sorted_indices[d]].score; + } +} + +std::vector EvaluateImages( + const std::vector>& area_ranges, + int max_detections, + const std::vector& iou_thresholds, + const ImageCategoryInstances>& image_category_ious, + const ImageCategoryInstances& + image_category_ground_truth_instances, + const ImageCategoryInstances& + image_category_detection_instances) { + const int num_area_ranges = area_ranges.size(); + const int num_images = image_category_ground_truth_instances.size(); + const int num_categories = + image_category_ious.size() > 0 ? image_category_ious[0].size() : 0; + std::vector detection_sorted_indices; + std::vector ground_truth_sorted_indices; + std::vector ignores; + std::vector results_all( + num_images * num_area_ranges * num_categories); + + // Store results for each image, category, and area range combination. Results + // for each IOU threshold are packed into the same ImageEvaluation object + for (auto i = 0; i < num_images; ++i) { + for (auto c = 0; c < num_categories; ++c) { + const std::vector& ground_truth_instances = + image_category_ground_truth_instances[i][c]; + const std::vector& detection_instances = + image_category_detection_instances[i][c]; + + SortInstancesByDetectionScore( + detection_instances, &detection_sorted_indices); + if ((int)detection_sorted_indices.size() > max_detections) { + detection_sorted_indices.resize(max_detections); + } + + for (size_t a = 0; a < area_ranges.size(); ++a) { + SortInstancesByIgnore( + area_ranges[a], + ground_truth_instances, + &ground_truth_sorted_indices, + &ignores); + + MatchDetectionsToGroundTruth( + detection_instances, + detection_sorted_indices, + ground_truth_instances, + ground_truth_sorted_indices, + ignores, + image_category_ious[i][c], + iou_thresholds, + area_ranges[a], + &results_all + [c * num_area_ranges * num_images + a * num_images + i]); + } + } + } + + return results_all; +} + +// Convert a python list to a vector +template +std::vector list_to_vec(const py::list& l) { + std::vector v(py::len(l)); + for (int i = 0; i < (int)py::len(l); ++i) { + v[i] = l[i].cast(); + } + return v; +} + +// Helper function to Accumulate() +// Considers the evaluation results applicable to a particular category, area +// range, and max_detections parameter setting, which begin at +// evaluations[evaluation_index]. Extracts a sorted list of length n of all +// applicable detection instances concatenated across all images in the dataset, +// which are represented by the outputs evaluation_indices, detection_scores, +// image_detection_indices, and detection_sorted_indices--all of which are +// length n. evaluation_indices[i] stores the applicable index into +// evaluations[] for instance i, which has detection score detection_score[i], +// and is the image_detection_indices[i]'th of the list of detections +// for the image containing i. detection_sorted_indices[] defines a sorted +// permutation of the 3 other outputs +int BuildSortedDetectionList( + const std::vector& evaluations, + const int64_t evaluation_index, + const int64_t num_images, + const int max_detections, + std::vector* evaluation_indices, + std::vector* detection_scores, + std::vector* detection_sorted_indices, + std::vector* image_detection_indices) { + assert(evaluations.size() >= evaluation_index + num_images); + + // Extract a list of object instances of the applicable category, area + // range, and max detections requirements such that they can be sorted + image_detection_indices->clear(); + evaluation_indices->clear(); + detection_scores->clear(); + image_detection_indices->reserve(num_images * max_detections); + evaluation_indices->reserve(num_images * max_detections); + detection_scores->reserve(num_images * max_detections); + int num_valid_ground_truth = 0; + for (auto i = 0; i < num_images; ++i) { + const ImageEvaluation& evaluation = evaluations[evaluation_index + i]; + + for (int d = 0; + d < (int)evaluation.detection_scores.size() && d < max_detections; + ++d) { // detected instances + evaluation_indices->push_back(evaluation_index + i); + image_detection_indices->push_back(d); + detection_scores->push_back(evaluation.detection_scores[d]); + } + for (auto ground_truth_ignore : evaluation.ground_truth_ignores) { + if (!ground_truth_ignore) { + ++num_valid_ground_truth; + } + } + } + + // Sort detections by decreasing score, using stable sort to match + // python implementation + detection_sorted_indices->resize(detection_scores->size()); + std::iota( + detection_sorted_indices->begin(), detection_sorted_indices->end(), 0); + std::stable_sort( + detection_sorted_indices->begin(), + detection_sorted_indices->end(), + [&detection_scores](size_t j1, size_t j2) { + return (*detection_scores)[j1] > (*detection_scores)[j2]; + }); + + return num_valid_ground_truth; +} + +// Helper function to Accumulate() +// Compute a precision recall curve given a sorted list of detected instances +// encoded in evaluations, evaluation_indices, detection_scores, +// detection_sorted_indices, image_detection_indices (see +// BuildSortedDetectionList()). Using vectors precisions and recalls +// and temporary storage, output the results into precisions_out, recalls_out, +// and scores_out, which are large buffers containing many precion/recall curves +// for all possible parameter settings, with precisions_out_index and +// recalls_out_index defining the applicable indices to store results. +void ComputePrecisionRecallCurve( + const int64_t precisions_out_index, + const int64_t precisions_out_stride, + const int64_t recalls_out_index, + const std::vector& recall_thresholds, + const int iou_threshold_index, + const int num_iou_thresholds, + const int num_valid_ground_truth, + const std::vector& evaluations, + const std::vector& evaluation_indices, + const std::vector& detection_scores, + const std::vector& detection_sorted_indices, + const std::vector& image_detection_indices, + std::vector* precisions, + std::vector* recalls, + std::vector* precisions_out, + std::vector* scores_out, + std::vector* recalls_out) { + assert(recalls_out->size() > recalls_out_index); + + // Compute precision/recall for each instance in the sorted list of detections + int64_t true_positives_sum = 0, false_positives_sum = 0; + precisions->clear(); + recalls->clear(); + precisions->reserve(detection_sorted_indices.size()); + recalls->reserve(detection_sorted_indices.size()); + assert(!evaluations.empty() || detection_sorted_indices.empty()); + for (auto detection_sorted_index : detection_sorted_indices) { + const ImageEvaluation& evaluation = + evaluations[evaluation_indices[detection_sorted_index]]; + const auto num_detections = + evaluation.detection_matches.size() / num_iou_thresholds; + const auto detection_index = iou_threshold_index * num_detections + + image_detection_indices[detection_sorted_index]; + assert(evaluation.detection_matches.size() > detection_index); + assert(evaluation.detection_ignores.size() > detection_index); + const int64_t detection_match = + evaluation.detection_matches[detection_index]; + const bool detection_ignores = + evaluation.detection_ignores[detection_index]; + const auto true_positive = detection_match > 0 && !detection_ignores; + const auto false_positive = detection_match == 0 && !detection_ignores; + if (true_positive) { + ++true_positives_sum; + } + if (false_positive) { + ++false_positives_sum; + } + + const double recall = + static_cast(true_positives_sum) / num_valid_ground_truth; + recalls->push_back(recall); + const int64_t num_valid_detections = + true_positives_sum + false_positives_sum; + const double precision = num_valid_detections > 0 + ? static_cast(true_positives_sum) / num_valid_detections + : 0.0; + precisions->push_back(precision); + } + + (*recalls_out)[recalls_out_index] = !recalls->empty() ? recalls->back() : 0; + + for (int64_t i = static_cast(precisions->size()) - 1; i > 0; --i) { + if ((*precisions)[i] > (*precisions)[i - 1]) { + (*precisions)[i - 1] = (*precisions)[i]; + } + } + + // Sample the per instance precision/recall list at each recall threshold + for (size_t r = 0; r < recall_thresholds.size(); ++r) { + // first index in recalls >= recall_thresholds[r] + std::vector::iterator low = std::lower_bound( + recalls->begin(), recalls->end(), recall_thresholds[r]); + size_t precisions_index = low - recalls->begin(); + + const auto results_ind = precisions_out_index + r * precisions_out_stride; + assert(results_ind < precisions_out->size()); + assert(results_ind < scores_out->size()); + if (precisions_index < precisions->size()) { + (*precisions_out)[results_ind] = (*precisions)[precisions_index]; + (*scores_out)[results_ind] = + detection_scores[detection_sorted_indices[precisions_index]]; + } else { + (*precisions_out)[results_ind] = 0; + (*scores_out)[results_ind] = 0; + } + } +} +py::dict Accumulate( + const py::object& params, + const std::vector& evaluations) { + const std::vector recall_thresholds = + list_to_vec(params.attr("recThrs")); + const std::vector max_detections = + list_to_vec(params.attr("maxDets")); + const int num_iou_thresholds = py::len(params.attr("iouThrs")); + const int num_recall_thresholds = py::len(params.attr("recThrs")); + const int num_categories = params.attr("useCats").cast() == 1 + ? py::len(params.attr("catIds")) + : 1; + const int num_area_ranges = py::len(params.attr("areaRng")); + const int num_max_detections = py::len(params.attr("maxDets")); + const int num_images = py::len(params.attr("imgIds")); + + std::vector precisions_out( + num_iou_thresholds * num_recall_thresholds * num_categories * + num_area_ranges * num_max_detections, + -1); + std::vector recalls_out( + num_iou_thresholds * num_categories * num_area_ranges * + num_max_detections, + -1); + std::vector scores_out( + num_iou_thresholds * num_recall_thresholds * num_categories * + num_area_ranges * num_max_detections, + -1); + + // Consider the list of all detected instances in the entire dataset in one + // large list. evaluation_indices, detection_scores, + // image_detection_indices, and detection_sorted_indices all have the same + // length as this list, such that each entry corresponds to one detected + // instance + std::vector evaluation_indices; // indices into evaluations[] + std::vector detection_scores; // detection scores of each instance + std::vector detection_sorted_indices; // sorted indices of all + // instances in the dataset + std::vector + image_detection_indices; // indices into the list of detected instances in + // the same image as each instance + std::vector precisions, recalls; + + for (auto c = 0; c < num_categories; ++c) { + for (auto a = 0; a < num_area_ranges; ++a) { + for (auto m = 0; m < num_max_detections; ++m) { + // The COCO PythonAPI assumes evaluations[] (the return value of + // COCOeval::EvaluateImages() is one long list storing results for each + // combination of category, area range, and image id, with categories in + // the outermost loop and images in the innermost loop. + const int64_t evaluations_index = + c * num_area_ranges * num_images + a * num_images; + int num_valid_ground_truth = BuildSortedDetectionList( + evaluations, + evaluations_index, + num_images, + max_detections[m], + &evaluation_indices, + &detection_scores, + &detection_sorted_indices, + &image_detection_indices); + + if (num_valid_ground_truth == 0) { + continue; + } + + for (auto t = 0; t < num_iou_thresholds; ++t) { + // recalls_out is a flattened vectors representing a + // num_iou_thresholds X num_categories X num_area_ranges X + // num_max_detections matrix + const int64_t recalls_out_index = + t * num_categories * num_area_ranges * num_max_detections + + c * num_area_ranges * num_max_detections + + a * num_max_detections + m; + + // precisions_out and scores_out are flattened vectors + // representing a num_iou_thresholds X num_recall_thresholds X + // num_categories X num_area_ranges X num_max_detections matrix + const int64_t precisions_out_stride = + num_categories * num_area_ranges * num_max_detections; + const int64_t precisions_out_index = t * num_recall_thresholds * + num_categories * num_area_ranges * num_max_detections + + c * num_area_ranges * num_max_detections + + a * num_max_detections + m; + + ComputePrecisionRecallCurve( + precisions_out_index, + precisions_out_stride, + recalls_out_index, + recall_thresholds, + t, + num_iou_thresholds, + num_valid_ground_truth, + evaluations, + evaluation_indices, + detection_scores, + detection_sorted_indices, + image_detection_indices, + &precisions, + &recalls, + &precisions_out, + &scores_out, + &recalls_out); + } + } + } + } + + time_t rawtime; + struct tm local_time; + std::array buffer; + time(&rawtime); +#ifdef _WIN32 + localtime_s(&local_time, &rawtime); +#else + localtime_r(&rawtime, &local_time); +#endif + strftime( + buffer.data(), 200, "%Y-%m-%d %H:%num_max_detections:%S", &local_time); + return py::dict( + "params"_a = params, + "counts"_a = std::vector({num_iou_thresholds, + num_recall_thresholds, + num_categories, + num_area_ranges, + num_max_detections}), + "date"_a = buffer, + "precision"_a = precisions_out, + "recall"_a = recalls_out, + "scores"_a = scores_out); +} + +} // namespace COCOeval diff --git a/yolox/layers/csrc/cocoeval/cocoeval.h b/yolox/layers/csrc/cocoeval/cocoeval.h new file mode 100644 index 0000000000000000000000000000000000000000..f9def4151102d1c493dc88186384342565798d05 --- /dev/null +++ b/yolox/layers/csrc/cocoeval/cocoeval.h @@ -0,0 +1,85 @@ +// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +#pragma once + +#include +#include +#include +#include +#include + +namespace py = pybind11; + +namespace COCOeval { + +// Annotation data for a single object instance in an image +struct InstanceAnnotation { + InstanceAnnotation( + uint64_t id, + double score, + double area, + bool is_crowd, + bool ignore) + : id{id}, score{score}, area{area}, is_crowd{is_crowd}, ignore{ignore} {} + uint64_t id; + double score = 0.; + double area = 0.; + bool is_crowd = false; + bool ignore = false; +}; + +// Stores intermediate results for evaluating detection results for a single +// image that has D detected instances and G ground truth instances. This stores +// matches between detected and ground truth instances +struct ImageEvaluation { + // For each of the D detected instances, the id of the matched ground truth + // instance, or 0 if unmatched + std::vector detection_matches; + + // The detection score of each of the D detected instances + std::vector detection_scores; + + // Marks whether or not each of G instances was ignored from evaluation (e.g., + // because it's outside area_range) + std::vector ground_truth_ignores; + + // Marks whether or not each of D instances was ignored from evaluation (e.g., + // because it's outside aRng) + std::vector detection_ignores; +}; + +template +using ImageCategoryInstances = std::vector>>; + +// C++ implementation of COCO API cocoeval.py::COCOeval.evaluateImg(). For each +// combination of image, category, area range settings, and IOU thresholds to +// evaluate, it matches detected instances to ground truth instances and stores +// the results into a vector of ImageEvaluation results, which will be +// interpreted by the COCOeval::Accumulate() function to produce precion-recall +// curves. The parameters of nested vectors have the following semantics: +// image_category_ious[i][c][d][g] is the intersection over union of the d'th +// detected instance and g'th ground truth instance of +// category category_ids[c] in image image_ids[i] +// image_category_ground_truth_instances[i][c] is a vector of ground truth +// instances in image image_ids[i] of category category_ids[c] +// image_category_detection_instances[i][c] is a vector of detected +// instances in image image_ids[i] of category category_ids[c] +std::vector EvaluateImages( + const std::vector>& area_ranges, // vector of 2-tuples + int max_detections, + const std::vector& iou_thresholds, + const ImageCategoryInstances>& image_category_ious, + const ImageCategoryInstances& + image_category_ground_truth_instances, + const ImageCategoryInstances& + image_category_detection_instances); + +// C++ implementation of COCOeval.accumulate(), which generates precision +// recall curves for each set of category, IOU threshold, detection area range, +// and max number of detections parameters. It is assumed that the parameter +// evaluations is the return value of the functon COCOeval::EvaluateImages(), +// which was called with the same parameter settings params +py::dict Accumulate( + const py::object& params, + const std::vector& evalutations); + +} // namespace COCOeval diff --git a/yolox/layers/csrc/vision.cpp b/yolox/layers/csrc/vision.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7663d0faf5c58542624d2f01730618b9aa9d4a25 --- /dev/null +++ b/yolox/layers/csrc/vision.cpp @@ -0,0 +1,13 @@ +#include "cocoeval/cocoeval.h" + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("COCOevalAccumulate", &COCOeval::Accumulate, "COCOeval::Accumulate"); + m.def( + "COCOevalEvaluateImages", + &COCOeval::EvaluateImages, + "COCOeval::EvaluateImages"); + pybind11::class_(m, "InstanceAnnotation") + .def(pybind11::init()); + pybind11::class_(m, "ImageEvaluation") + .def(pybind11::init<>()); +} diff --git a/yolox/layers/fast_coco_eval_api.py b/yolox/layers/fast_coco_eval_api.py new file mode 100644 index 0000000000000000000000000000000000000000..442c97eed233eb6a1ccf05dd6ea6b94e35ca4c9d --- /dev/null +++ b/yolox/layers/fast_coco_eval_api.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# This file comes from +# https://github.com/facebookresearch/detectron2/blob/master/detectron2/evaluation/fast_eval_api.py +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +import numpy as np +from pycocotools.cocoeval import COCOeval + +# import torch first to make yolox._C work without ImportError of libc10.so +# in YOLOX, env is already set in __init__.py. +from yolox import _C + +import copy +import time + + +class COCOeval_opt(COCOeval): + """ + This is a slightly modified version of the original COCO API, where the functions evaluateImg() + and accumulate() are implemented in C++ to speedup evaluation + """ + + def evaluate(self): + """ + Run per image evaluation on given images and store results in self.evalImgs_cpp, a + datastructure that isn't readable from Python but is used by a c++ implementation of + accumulate(). Unlike the original COCO PythonAPI, we don't populate the datastructure + self.evalImgs because this datastructure is a computational bottleneck. + :return: None + """ + tic = time.time() + + print("Running per image evaluation...") + p = self.params + # add backward compatibility if useSegm is specified in params + if p.useSegm is not None: + p.iouType = "segm" if p.useSegm == 1 else "bbox" + print( + "useSegm (deprecated) is not None. Running {} evaluation".format( + p.iouType + ) + ) + print("Evaluate annotation type *{}*".format(p.iouType)) + p.imgIds = list(np.unique(p.imgIds)) + if p.useCats: + p.catIds = list(np.unique(p.catIds)) + p.maxDets = sorted(p.maxDets) + self.params = p + + self._prepare() + + # loop through images, area range, max detection number + catIds = p.catIds if p.useCats else [-1] + + if p.iouType == "segm" or p.iouType == "bbox": + computeIoU = self.computeIoU + elif p.iouType == "keypoints": + computeIoU = self.computeOks + self.ious = { + (imgId, catId): computeIoU(imgId, catId) + for imgId in p.imgIds + for catId in catIds + } + + maxDet = p.maxDets[-1] + + # <<<< Beginning of code differences with original COCO API + def convert_instances_to_cpp(instances, is_det=False): + # Convert annotations for a list of instances in an image to a format that's fast + # to access in C++ + instances_cpp = [] + for instance in instances: + instance_cpp = _C.InstanceAnnotation( + int(instance["id"]), + instance["score"] if is_det else instance.get("score", 0.0), + instance["area"], + bool(instance.get("iscrowd", 0)), + bool(instance.get("ignore", 0)), + ) + instances_cpp.append(instance_cpp) + return instances_cpp + + # Convert GT annotations, detections, and IOUs to a format that's fast to access in C++ + ground_truth_instances = [ + [convert_instances_to_cpp(self._gts[imgId, catId]) for catId in p.catIds] + for imgId in p.imgIds + ] + detected_instances = [ + [ + convert_instances_to_cpp(self._dts[imgId, catId], is_det=True) + for catId in p.catIds + ] + for imgId in p.imgIds + ] + ious = [[self.ious[imgId, catId] for catId in catIds] for imgId in p.imgIds] + + if not p.useCats: + # For each image, flatten per-category lists into a single list + ground_truth_instances = [ + [[o for c in i for o in c]] for i in ground_truth_instances + ] + detected_instances = [ + [[o for c in i for o in c]] for i in detected_instances + ] + + # Call C++ implementation of self.evaluateImgs() + self._evalImgs_cpp = _C.COCOevalEvaluateImages( + p.areaRng, + maxDet, + p.iouThrs, + ious, + ground_truth_instances, + detected_instances, + ) + self._evalImgs = None + + self._paramsEval = copy.deepcopy(self.params) + toc = time.time() + print("COCOeval_opt.evaluate() finished in {:0.2f} seconds.".format(toc - tic)) + # >>>> End of code differences with original COCO API + + def accumulate(self): + """ + Accumulate per image evaluation results and store the result in self.eval. Does not + support changing parameter settings from those used by self.evaluate() + """ + print("Accumulating evaluation results...") + tic = time.time() + if not hasattr(self, "_evalImgs_cpp"): + print("Please run evaluate() first") + + self.eval = _C.COCOevalAccumulate(self._paramsEval, self._evalImgs_cpp) + + # recall is num_iou_thresholds X num_categories X num_area_ranges X num_max_detections + self.eval["recall"] = np.array(self.eval["recall"]).reshape( + self.eval["counts"][:1] + self.eval["counts"][2:] + ) + + # precision and scores are num_iou_thresholds X num_recall_thresholds X num_categories X + # num_area_ranges X num_max_detections + self.eval["precision"] = np.array(self.eval["precision"]).reshape( + self.eval["counts"] + ) + self.eval["scores"] = np.array(self.eval["scores"]).reshape(self.eval["counts"]) + toc = time.time() + print( + "COCOeval_opt.accumulate() finished in {:0.2f} seconds.".format(toc - tic) + ) diff --git a/yolox/models/__init__.py b/yolox/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c4641a61bf466259c88e0a0b92e4ff55b2abcd61 --- /dev/null +++ b/yolox/models/__init__.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +from .darknet import CSPDarknet, Darknet +from .losses import IOUloss +from .yolo_fpn import YOLOFPN +from .yolo_head import YOLOXHead +from .yolo_pafpn import YOLOPAFPN +from .yolox import YOLOX diff --git a/yolox/models/darknet.py b/yolox/models/darknet.py new file mode 100644 index 0000000000000000000000000000000000000000..70c79f86a0f444d5325329b5e8c9b50c864d48f0 --- /dev/null +++ b/yolox/models/darknet.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +from torch import nn + +from .network_blocks import BaseConv, CSPLayer, DWConv, Focus, ResLayer, SPPBottleneck + + +class Darknet(nn.Module): + # number of blocks from dark2 to dark5. + depth2blocks = {21: [1, 2, 2, 1], 53: [2, 8, 8, 4]} + + def __init__( + self, + depth, + in_channels=3, + stem_out_channels=32, + out_features=("dark3", "dark4", "dark5"), + ): + """ + Args: + depth (int): depth of darknet used in model, usually use [21, 53] for this param. + in_channels (int): number of input channels, for example, use 3 for RGB image. + stem_out_channels (int): number of output chanels of darknet stem. + It decides channels of darknet layer2 to layer5. + out_features (Tuple[str]): desired output layer name. + """ + super().__init__() + assert out_features, "please provide output features of Darknet" + self.out_features = out_features + self.stem = nn.Sequential( + BaseConv(in_channels, stem_out_channels, ksize=3, stride=1, act="lrelu"), + *self.make_group_layer(stem_out_channels, num_blocks=1, stride=2), + ) + in_channels = stem_out_channels * 2 # 64 + + num_blocks = Darknet.depth2blocks[depth] + # create darknet with `stem_out_channels` and `num_blocks` layers. + # to make model structure more clear, we don't use `for` statement in python. + self.dark2 = nn.Sequential( + *self.make_group_layer(in_channels, num_blocks[0], stride=2) + ) + in_channels *= 2 # 128 + self.dark3 = nn.Sequential( + *self.make_group_layer(in_channels, num_blocks[1], stride=2) + ) + in_channels *= 2 # 256 + self.dark4 = nn.Sequential( + *self.make_group_layer(in_channels, num_blocks[2], stride=2) + ) + in_channels *= 2 # 512 + + self.dark5 = nn.Sequential( + *self.make_group_layer(in_channels, num_blocks[3], stride=2), + *self.make_spp_block([in_channels, in_channels * 2], in_channels * 2), + ) + + def make_group_layer(self, in_channels: int, num_blocks: int, stride: int = 1): + "starts with conv layer then has `num_blocks` `ResLayer`" + return [ + BaseConv(in_channels, in_channels * 2, ksize=3, stride=stride, act="lrelu"), + *[(ResLayer(in_channels * 2)) for _ in range(num_blocks)], + ] + + def make_spp_block(self, filters_list, in_filters): + m = nn.Sequential( + *[ + BaseConv(in_filters, filters_list[0], 1, stride=1, act="lrelu"), + BaseConv(filters_list[0], filters_list[1], 3, stride=1, act="lrelu"), + SPPBottleneck( + in_channels=filters_list[1], + out_channels=filters_list[0], + activation="lrelu", + ), + BaseConv(filters_list[0], filters_list[1], 3, stride=1, act="lrelu"), + BaseConv(filters_list[1], filters_list[0], 1, stride=1, act="lrelu"), + ] + ) + return m + + def forward(self, x): + outputs = {} + x = self.stem(x) + outputs["stem"] = x + x = self.dark2(x) + outputs["dark2"] = x + x = self.dark3(x) + outputs["dark3"] = x + x = self.dark4(x) + outputs["dark4"] = x + x = self.dark5(x) + outputs["dark5"] = x + return {k: v for k, v in outputs.items() if k in self.out_features} + + +class CSPDarknet(nn.Module): + def __init__( + self, + dep_mul, + wid_mul, + out_features=("dark3", "dark4", "dark5"), + depthwise=False, + act="silu", + ): + super().__init__() + assert out_features, "please provide output features of Darknet" + self.out_features = out_features + Conv = DWConv if depthwise else BaseConv + + base_channels = int(wid_mul * 64) # 64 + base_depth = max(round(dep_mul * 3), 1) # 3 + + # stem + self.stem = Focus(3, base_channels, ksize=3, act=act) + + # dark2 + self.dark2 = nn.Sequential( + Conv(base_channels, base_channels * 2, 3, 2, act=act), + CSPLayer( + base_channels * 2, + base_channels * 2, + n=base_depth, + depthwise=depthwise, + act=act, + ), + ) + + # dark3 + self.dark3 = nn.Sequential( + Conv(base_channels * 2, base_channels * 4, 3, 2, act=act), + CSPLayer( + base_channels * 4, + base_channels * 4, + n=base_depth * 3, + depthwise=depthwise, + act=act, + ), + ) + + # dark4 + self.dark4 = nn.Sequential( + Conv(base_channels * 4, base_channels * 8, 3, 2, act=act), + CSPLayer( + base_channels * 8, + base_channels * 8, + n=base_depth * 3, + depthwise=depthwise, + act=act, + ), + ) + + # dark5 + self.dark5 = nn.Sequential( + Conv(base_channels * 8, base_channels * 16, 3, 2, act=act), + SPPBottleneck(base_channels * 16, base_channels * 16, activation=act), + CSPLayer( + base_channels * 16, + base_channels * 16, + n=base_depth, + shortcut=False, + depthwise=depthwise, + act=act, + ), + ) + + def forward(self, x): + outputs = {} + x = self.stem(x) + outputs["stem"] = x + x = self.dark2(x) + outputs["dark2"] = x + x = self.dark3(x) + outputs["dark3"] = x + x = self.dark4(x) + outputs["dark4"] = x + x = self.dark5(x) + outputs["dark5"] = x + return {k: v for k, v in outputs.items() if k in self.out_features} diff --git a/yolox/models/losses.py b/yolox/models/losses.py new file mode 100644 index 0000000000000000000000000000000000000000..a789ebab8ba28a3927d467947c7d918fe4f2478b --- /dev/null +++ b/yolox/models/losses.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class IOUloss(nn.Module): + def __init__(self, reduction="none", loss_type="iou"): + super(IOUloss, self).__init__() + self.reduction = reduction + self.loss_type = loss_type + + def forward(self, pred, target): + assert pred.shape[0] == target.shape[0] + + pred = pred.view(-1, 4) + target = target.view(-1, 4) + tl = torch.max( + (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2) + ) + br = torch.min( + (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2) + ) + + area_p = torch.prod(pred[:, 2:], 1) + area_g = torch.prod(target[:, 2:], 1) + + en = (tl < br).type(tl.type()).prod(dim=1) + area_i = torch.prod(br - tl, 1) * en + iou = (area_i) / (area_p + area_g - area_i + 1e-16) + + if self.loss_type == "iou": + loss = 1 - iou ** 2 + elif self.loss_type == "giou": + c_tl = torch.min( + (pred[:, :2] - pred[:, 2:] / 2), (target[:, :2] - target[:, 2:] / 2) + ) + c_br = torch.max( + (pred[:, :2] + pred[:, 2:] / 2), (target[:, :2] + target[:, 2:] / 2) + ) + area_c = torch.prod(c_br - c_tl, 1) + giou = iou - (area_c - area_i) / area_c.clamp(1e-16) + loss = 1 - giou.clamp(min=-1.0, max=1.0) + + if self.reduction == "mean": + loss = loss.mean() + elif self.reduction == "sum": + loss = loss.sum() + + return loss + + +def sigmoid_focal_loss(inputs, targets, num_boxes, alpha: float = 0.25, gamma: float = 2): + """ + Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002. + Args: + inputs: A float tensor of arbitrary shape. + The predictions for each example. + targets: A float tensor with the same shape as inputs. Stores the binary + classification label for each element in inputs + (0 for the negative class and 1 for the positive class). + alpha: (optional) Weighting factor in range (0,1) to balance + positive vs negative examples. Default = -1 (no weighting). + gamma: Exponent of the modulating factor (1 - p_t) to + balance easy vs hard examples. + Returns: + Loss tensor + """ + prob = inputs.sigmoid() + ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none") + p_t = prob * targets + (1 - prob) * (1 - targets) + loss = ce_loss * ((1 - p_t) ** gamma) + + if alpha >= 0: + alpha_t = alpha * targets + (1 - alpha) * (1 - targets) + loss = alpha_t * loss + #return loss.mean(0).sum() / num_boxes + return loss.sum() / num_boxes \ No newline at end of file diff --git a/yolox/models/network_blocks.py b/yolox/models/network_blocks.py new file mode 100644 index 0000000000000000000000000000000000000000..4bdb2ca731a07aa9e5e6b68c652467f28fe96079 --- /dev/null +++ b/yolox/models/network_blocks.py @@ -0,0 +1,210 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +import torch +import torch.nn as nn + + +class SiLU(nn.Module): + """export-friendly version of nn.SiLU()""" + + @staticmethod + def forward(x): + return x * torch.sigmoid(x) + + +def get_activation(name="silu", inplace=True): + if name == "silu": + module = nn.SiLU(inplace=inplace) + elif name == "relu": + module = nn.ReLU(inplace=inplace) + elif name == "lrelu": + module = nn.LeakyReLU(0.1, inplace=inplace) + else: + raise AttributeError("Unsupported act type: {}".format(name)) + return module + + +class BaseConv(nn.Module): + """A Conv2d -> Batchnorm -> silu/leaky relu block""" + + def __init__( + self, in_channels, out_channels, ksize, stride, groups=1, bias=False, act="silu" + ): + super().__init__() + # same padding + pad = (ksize - 1) // 2 + self.conv = nn.Conv2d( + in_channels, + out_channels, + kernel_size=ksize, + stride=stride, + padding=pad, + groups=groups, + bias=bias, + ) + self.bn = nn.BatchNorm2d(out_channels) + self.act = get_activation(act, inplace=True) + + def forward(self, x): + return self.act(self.bn(self.conv(x))) + + def fuseforward(self, x): + return self.act(self.conv(x)) + + +class DWConv(nn.Module): + """Depthwise Conv + Conv""" + + def __init__(self, in_channels, out_channels, ksize, stride=1, act="silu"): + super().__init__() + self.dconv = BaseConv( + in_channels, + in_channels, + ksize=ksize, + stride=stride, + groups=in_channels, + act=act, + ) + self.pconv = BaseConv( + in_channels, out_channels, ksize=1, stride=1, groups=1, act=act + ) + + def forward(self, x): + x = self.dconv(x) + return self.pconv(x) + + +class Bottleneck(nn.Module): + # Standard bottleneck + def __init__( + self, + in_channels, + out_channels, + shortcut=True, + expansion=0.5, + depthwise=False, + act="silu", + ): + super().__init__() + hidden_channels = int(out_channels * expansion) + Conv = DWConv if depthwise else BaseConv + self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act) + self.conv2 = Conv(hidden_channels, out_channels, 3, stride=1, act=act) + self.use_add = shortcut and in_channels == out_channels + + def forward(self, x): + y = self.conv2(self.conv1(x)) + if self.use_add: + y = y + x + return y + + +class ResLayer(nn.Module): + "Residual layer with `in_channels` inputs." + + def __init__(self, in_channels: int): + super().__init__() + mid_channels = in_channels // 2 + self.layer1 = BaseConv( + in_channels, mid_channels, ksize=1, stride=1, act="lrelu" + ) + self.layer2 = BaseConv( + mid_channels, in_channels, ksize=3, stride=1, act="lrelu" + ) + + def forward(self, x): + out = self.layer2(self.layer1(x)) + return x + out + + +class SPPBottleneck(nn.Module): + """Spatial pyramid pooling layer used in YOLOv3-SPP""" + + def __init__( + self, in_channels, out_channels, kernel_sizes=(5, 9, 13), activation="silu" + ): + super().__init__() + hidden_channels = in_channels // 2 + self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=activation) + self.m = nn.ModuleList( + [ + nn.MaxPool2d(kernel_size=ks, stride=1, padding=ks // 2) + for ks in kernel_sizes + ] + ) + conv2_channels = hidden_channels * (len(kernel_sizes) + 1) + self.conv2 = BaseConv(conv2_channels, out_channels, 1, stride=1, act=activation) + + def forward(self, x): + x = self.conv1(x) + x = torch.cat([x] + [m(x) for m in self.m], dim=1) + x = self.conv2(x) + return x + + +class CSPLayer(nn.Module): + """C3 in yolov5, CSP Bottleneck with 3 convolutions""" + + def __init__( + self, + in_channels, + out_channels, + n=1, + shortcut=True, + expansion=0.5, + depthwise=False, + act="silu", + ): + """ + Args: + in_channels (int): input channels. + out_channels (int): output channels. + n (int): number of Bottlenecks. Default value: 1. + """ + # ch_in, ch_out, number, shortcut, groups, expansion + super().__init__() + hidden_channels = int(out_channels * expansion) # hidden channels + self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act) + self.conv2 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act) + self.conv3 = BaseConv(2 * hidden_channels, out_channels, 1, stride=1, act=act) + module_list = [ + Bottleneck( + hidden_channels, hidden_channels, shortcut, 1.0, depthwise, act=act + ) + for _ in range(n) + ] + self.m = nn.Sequential(*module_list) + + def forward(self, x): + x_1 = self.conv1(x) + x_2 = self.conv2(x) + x_1 = self.m(x_1) + x = torch.cat((x_1, x_2), dim=1) + return self.conv3(x) + + +class Focus(nn.Module): + """Focus width and height information into channel space.""" + + def __init__(self, in_channels, out_channels, ksize=1, stride=1, act="silu"): + super().__init__() + self.conv = BaseConv(in_channels * 4, out_channels, ksize, stride, act=act) + + def forward(self, x): + # shape of x (b,c,w,h) -> y(b,4c,w/2,h/2) + patch_top_left = x[..., ::2, ::2] + patch_top_right = x[..., ::2, 1::2] + patch_bot_left = x[..., 1::2, ::2] + patch_bot_right = x[..., 1::2, 1::2] + x = torch.cat( + ( + patch_top_left, + patch_bot_left, + patch_top_right, + patch_bot_right, + ), + dim=1, + ) + return self.conv(x) diff --git a/yolox/models/yolo_fpn.py b/yolox/models/yolo_fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..8b3ba1473c005a57187247fd276ee5920750add8 --- /dev/null +++ b/yolox/models/yolo_fpn.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +import torch +import torch.nn as nn + +from .darknet import Darknet +from .network_blocks import BaseConv + + +class YOLOFPN(nn.Module): + """ + YOLOFPN module. Darknet 53 is the default backbone of this model. + """ + + def __init__( + self, + depth=53, + in_features=["dark3", "dark4", "dark5"], + ): + super().__init__() + + self.backbone = Darknet(depth) + self.in_features = in_features + + # out 1 + self.out1_cbl = self._make_cbl(512, 256, 1) + self.out1 = self._make_embedding([256, 512], 512 + 256) + + # out 2 + self.out2_cbl = self._make_cbl(256, 128, 1) + self.out2 = self._make_embedding([128, 256], 256 + 128) + + # upsample + self.upsample = nn.Upsample(scale_factor=2, mode="nearest") + + def _make_cbl(self, _in, _out, ks): + return BaseConv(_in, _out, ks, stride=1, act="lrelu") + + def _make_embedding(self, filters_list, in_filters): + m = nn.Sequential( + *[ + self._make_cbl(in_filters, filters_list[0], 1), + self._make_cbl(filters_list[0], filters_list[1], 3), + self._make_cbl(filters_list[1], filters_list[0], 1), + self._make_cbl(filters_list[0], filters_list[1], 3), + self._make_cbl(filters_list[1], filters_list[0], 1), + ] + ) + return m + + def load_pretrained_model(self, filename="./weights/darknet53.mix.pth"): + with open(filename, "rb") as f: + state_dict = torch.load(f, map_location="cpu") + print("loading pretrained weights...") + self.backbone.load_state_dict(state_dict) + + def forward(self, inputs): + """ + Args: + inputs (Tensor): input image. + + Returns: + Tuple[Tensor]: FPN output features.. + """ + # backbone + out_features = self.backbone(inputs) + x2, x1, x0 = [out_features[f] for f in self.in_features] + + # yolo branch 1 + x1_in = self.out1_cbl(x0) + x1_in = self.upsample(x1_in) + x1_in = torch.cat([x1_in, x1], 1) + out_dark4 = self.out1(x1_in) + + # yolo branch 2 + x2_in = self.out2_cbl(out_dark4) + x2_in = self.upsample(x2_in) + x2_in = torch.cat([x2_in, x2], 1) + out_dark3 = self.out2(x2_in) + + outputs = (out_dark3, out_dark4, x0) + return outputs diff --git a/yolox/models/yolo_head.py b/yolox/models/yolo_head.py new file mode 100644 index 0000000000000000000000000000000000000000..ba8238f17d317eeb4f6c4dc7470f3a6db3ce3ece --- /dev/null +++ b/yolox/models/yolo_head.py @@ -0,0 +1,660 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +from loguru import logger + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from yolox.utils import bboxes_iou + +import math + +from .losses import IOUloss +from .network_blocks import BaseConv, DWConv + + +class YOLOXHead(nn.Module): + def __init__( + self, + num_classes, + width=1.0, + strides=[8, 16, 32], + in_channels=[256, 512, 1024], + act="silu", + depthwise=False, + ): + """ + Args: + act (str): activation type of conv. Defalut value: "silu". + depthwise (bool): wheather apply depthwise conv in conv branch. Defalut value: False. + """ + super().__init__() + + self.n_anchors = 1 + self.num_classes = num_classes + self.decode_in_inference = True # for deploy, set to False + + self.cls_convs = nn.ModuleList() + self.reg_convs = nn.ModuleList() + self.cls_preds = nn.ModuleList() + self.reg_preds = nn.ModuleList() + self.obj_preds = nn.ModuleList() + self.stems = nn.ModuleList() + Conv = DWConv if depthwise else BaseConv + + for i in range(len(in_channels)): + self.stems.append( + BaseConv( + in_channels=int(in_channels[i] * width), + out_channels=int(256 * width), + ksize=1, + stride=1, + act=act, + ) + ) + self.cls_convs.append( + nn.Sequential( + *[ + Conv( + in_channels=int(256 * width), + out_channels=int(256 * width), + ksize=3, + stride=1, + act=act, + ), + Conv( + in_channels=int(256 * width), + out_channels=int(256 * width), + ksize=3, + stride=1, + act=act, + ), + ] + ) + ) + self.reg_convs.append( + nn.Sequential( + *[ + Conv( + in_channels=int(256 * width), + out_channels=int(256 * width), + ksize=3, + stride=1, + act=act, + ), + Conv( + in_channels=int(256 * width), + out_channels=int(256 * width), + ksize=3, + stride=1, + act=act, + ), + ] + ) + ) + self.cls_preds.append( + nn.Conv2d( + in_channels=int(256 * width), + out_channels=self.n_anchors * self.num_classes, + kernel_size=1, + stride=1, + padding=0, + ) + ) + self.reg_preds.append( + nn.Conv2d( + in_channels=int(256 * width), + out_channels=4, + kernel_size=1, + stride=1, + padding=0, + ) + ) + self.obj_preds.append( + nn.Conv2d( + in_channels=int(256 * width), + out_channels=self.n_anchors * 1, + kernel_size=1, + stride=1, + padding=0, + ) + ) + + self.use_l1 = False + self.l1_loss = nn.L1Loss(reduction="none") + self.bcewithlog_loss = nn.BCEWithLogitsLoss(reduction="none") + self.iou_loss = IOUloss(reduction="none") + self.strides = strides + self.grids = [torch.zeros(1)] * len(in_channels) + self.expanded_strides = [None] * len(in_channels) + + def initialize_biases(self, prior_prob): + for conv in self.cls_preds: + b = conv.bias.view(self.n_anchors, -1) + b.data.fill_(-math.log((1 - prior_prob) / prior_prob)) + conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) + + for conv in self.obj_preds: + b = conv.bias.view(self.n_anchors, -1) + b.data.fill_(-math.log((1 - prior_prob) / prior_prob)) + conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) + + def forward(self, xin, labels=None, imgs=None): + outputs = [] + origin_preds = [] + x_shifts = [] + y_shifts = [] + expanded_strides = [] + + for k, (cls_conv, reg_conv, stride_this_level, x) in enumerate( + zip(self.cls_convs, self.reg_convs, self.strides, xin) + ): + x = self.stems[k](x) + cls_x = x + reg_x = x + + cls_feat = cls_conv(cls_x) + cls_output = self.cls_preds[k](cls_feat) + + reg_feat = reg_conv(reg_x) + reg_output = self.reg_preds[k](reg_feat) + obj_output = self.obj_preds[k](reg_feat) + + if self.training: + output = torch.cat([reg_output, obj_output, cls_output], 1) + output, grid = self.get_output_and_grid( + output, k, stride_this_level, xin[0].type() + ) + x_shifts.append(grid[:, :, 0]) + y_shifts.append(grid[:, :, 1]) + expanded_strides.append( + torch.zeros(1, grid.shape[1]) + .fill_(stride_this_level) + .type_as(xin[0]) + ) + if self.use_l1: + batch_size = reg_output.shape[0] + hsize, wsize = reg_output.shape[-2:] + reg_output = reg_output.view( + batch_size, self.n_anchors, 4, hsize, wsize + ) + reg_output = reg_output.permute(0, 1, 3, 4, 2).reshape( + batch_size, -1, 4 + ) + origin_preds.append(reg_output.clone()) + + else: + output = torch.cat( + [reg_output, obj_output.sigmoid(), cls_output.sigmoid()], 1 + ) + + outputs.append(output) + + if self.training: + return self.get_losses( + imgs, + x_shifts, + y_shifts, + expanded_strides, + labels, + torch.cat(outputs, 1), + origin_preds, + dtype=xin[0].dtype, + ) + else: + self.hw = [x.shape[-2:] for x in outputs] + # [batch, n_anchors_all, 85] + outputs = torch.cat( + [x.flatten(start_dim=2) for x in outputs], dim=2 + ).permute(0, 2, 1) + if self.decode_in_inference: + return self.decode_outputs(outputs, dtype=xin[0].type()) + else: + return outputs + + def get_output_and_grid(self, output, k, stride, dtype): + grid = self.grids[k] + + batch_size = output.shape[0] + n_ch = 5 + self.num_classes + hsize, wsize = output.shape[-2:] + if grid.shape[2:4] != output.shape[2:4]: + yv, xv = torch.meshgrid([torch.arange(hsize), torch.arange(wsize)]) + grid = torch.stack((xv, yv), 2).view(1, 1, hsize, wsize, 2).type(dtype) + self.grids[k] = grid + + output = output.view(batch_size, self.n_anchors, n_ch, hsize, wsize) + output = output.permute(0, 1, 3, 4, 2).reshape( + batch_size, self.n_anchors * hsize * wsize, -1 + ) + grid = grid.view(1, -1, 2) + output[..., :2] = (output[..., :2] + grid) * stride + output[..., 2:4] = torch.exp(output[..., 2:4]) * stride + return output, grid + + def decode_outputs(self, outputs, dtype): + grids = [] + strides = [] + for (hsize, wsize), stride in zip(self.hw, self.strides): + yv, xv = torch.meshgrid([torch.arange(hsize), torch.arange(wsize)]) + grid = torch.stack((xv, yv), 2).view(1, -1, 2) + grids.append(grid) + shape = grid.shape[:2] + strides.append(torch.full((*shape, 1), stride)) + + grids = torch.cat(grids, dim=1).type(dtype) + strides = torch.cat(strides, dim=1).type(dtype) + + outputs[..., :2] = (outputs[..., :2] + grids) * strides + outputs[..., 2:4] = torch.exp(outputs[..., 2:4]) * strides + return outputs + + def get_losses( + self, + imgs, + x_shifts, + y_shifts, + expanded_strides, + labels, + outputs, + origin_preds, + dtype, + ): + bbox_preds = outputs[:, :, :4] # [batch, n_anchors_all, 4] + obj_preds = outputs[:, :, 4].unsqueeze(-1) # [batch, n_anchors_all, 1] + cls_preds = outputs[:, :, 5:] # [batch, n_anchors_all, n_cls] + + # calculate targets + mixup = labels.shape[2] > 5 + if mixup: + label_cut = labels[..., :5] + else: + label_cut = labels + nlabel = (label_cut.sum(dim=2) > 0).sum(dim=1) # number of objects + + total_num_anchors = outputs.shape[1] + x_shifts = torch.cat(x_shifts, 1) # [1, n_anchors_all] + y_shifts = torch.cat(y_shifts, 1) # [1, n_anchors_all] + expanded_strides = torch.cat(expanded_strides, 1) + if self.use_l1: + origin_preds = torch.cat(origin_preds, 1) + + cls_targets = [] + reg_targets = [] + l1_targets = [] + obj_targets = [] + fg_masks = [] + + num_fg = 0.0 + num_gts = 0.0 + + for batch_idx in range(outputs.shape[0]): + num_gt = int(nlabel[batch_idx]) + num_gts += num_gt + if num_gt == 0: + cls_target = outputs.new_zeros((0, self.num_classes)) + reg_target = outputs.new_zeros((0, 4)) + l1_target = outputs.new_zeros((0, 4)) + obj_target = outputs.new_zeros((total_num_anchors, 1)) + fg_mask = outputs.new_zeros(total_num_anchors).bool() + else: + gt_bboxes_per_image = labels[batch_idx, :num_gt, 1:5] + gt_classes = labels[batch_idx, :num_gt, 0] + bboxes_preds_per_image = bbox_preds[batch_idx] + + try: + ( + gt_matched_classes, + fg_mask, + pred_ious_this_matching, + matched_gt_inds, + num_fg_img, + ) = self.get_assignments( # noqa + batch_idx, + num_gt, + total_num_anchors, + gt_bboxes_per_image, + gt_classes, + bboxes_preds_per_image, + expanded_strides, + x_shifts, + y_shifts, + cls_preds, + bbox_preds, + obj_preds, + labels, + imgs, + ) + except RuntimeError: + logger.info( + "OOM RuntimeError is raised due to the huge memory cost during label assignment. \ + CPU mode is applied in this batch. If you want to avoid this issue, \ + try to reduce the batch size or image size." + ) + print("OOM RuntimeError is raised due to the huge memory cost during label assignment. \ + CPU mode is applied in this batch. If you want to avoid this issue, \ + try to reduce the batch size or image size.") + torch.cuda.empty_cache() + ( + gt_matched_classes, + fg_mask, + pred_ious_this_matching, + matched_gt_inds, + num_fg_img, + ) = self.get_assignments( # noqa + batch_idx, + num_gt, + total_num_anchors, + gt_bboxes_per_image, + gt_classes, + bboxes_preds_per_image, + expanded_strides, + x_shifts, + y_shifts, + cls_preds, + bbox_preds, + obj_preds, + labels, + imgs, + "cpu", + ) + + + torch.cuda.empty_cache() + num_fg += num_fg_img + + cls_target = F.one_hot( + gt_matched_classes.to(torch.int64), self.num_classes + ) * pred_ious_this_matching.unsqueeze(-1) + obj_target = fg_mask.unsqueeze(-1) + reg_target = gt_bboxes_per_image[matched_gt_inds] + + if self.use_l1: + l1_target = self.get_l1_target( + outputs.new_zeros((num_fg_img, 4)), + gt_bboxes_per_image[matched_gt_inds], + expanded_strides[0][fg_mask], + x_shifts=x_shifts[0][fg_mask], + y_shifts=y_shifts[0][fg_mask], + ) + + cls_targets.append(cls_target) + reg_targets.append(reg_target) + obj_targets.append(obj_target.to(dtype)) + fg_masks.append(fg_mask) + if self.use_l1: + l1_targets.append(l1_target) + + cls_targets = torch.cat(cls_targets, 0) + reg_targets = torch.cat(reg_targets, 0) + obj_targets = torch.cat(obj_targets, 0) + fg_masks = torch.cat(fg_masks, 0) + if self.use_l1: + l1_targets = torch.cat(l1_targets, 0) + + num_fg = max(num_fg, 1) + loss_iou = ( + self.iou_loss(bbox_preds.view(-1, 4)[fg_masks], reg_targets) + ).sum() / num_fg + loss_obj = ( + self.bcewithlog_loss(obj_preds.view(-1, 1), obj_targets) + ).sum() / num_fg + loss_cls = ( + self.bcewithlog_loss( + cls_preds.view(-1, self.num_classes)[fg_masks], cls_targets + ) + ).sum() / num_fg + if self.use_l1: + loss_l1 = ( + self.l1_loss(origin_preds.view(-1, 4)[fg_masks], l1_targets) + ).sum() / num_fg + else: + loss_l1 = 0.0 + + reg_weight = 5.0 + loss = reg_weight * loss_iou + loss_obj + loss_cls + loss_l1 + + return ( + loss, + reg_weight * loss_iou, + loss_obj, + loss_cls, + loss_l1, + num_fg / max(num_gts, 1), + ) + + def get_l1_target(self, l1_target, gt, stride, x_shifts, y_shifts, eps=1e-8): + l1_target[:, 0] = gt[:, 0] / stride - x_shifts + l1_target[:, 1] = gt[:, 1] / stride - y_shifts + l1_target[:, 2] = torch.log(gt[:, 2] / stride + eps) + l1_target[:, 3] = torch.log(gt[:, 3] / stride + eps) + return l1_target + + @torch.no_grad() + def get_assignments( + self, + batch_idx, + num_gt, + total_num_anchors, + gt_bboxes_per_image, + gt_classes, + bboxes_preds_per_image, + expanded_strides, + x_shifts, + y_shifts, + cls_preds, + bbox_preds, + obj_preds, + labels, + imgs, + mode="gpu", + ): + + if mode == "cpu": + print("------------CPU Mode for This Batch-------------") + gt_bboxes_per_image = gt_bboxes_per_image.cpu().float() + bboxes_preds_per_image = bboxes_preds_per_image.cpu().float() + gt_classes = gt_classes.cpu().float() + expanded_strides = expanded_strides.cpu().float() + x_shifts = x_shifts.cpu() + y_shifts = y_shifts.cpu() + + img_size = imgs.shape[2:] + fg_mask, is_in_boxes_and_center = self.get_in_boxes_info( + gt_bboxes_per_image, + expanded_strides, + x_shifts, + y_shifts, + total_num_anchors, + num_gt, + img_size + ) + + bboxes_preds_per_image = bboxes_preds_per_image[fg_mask] + cls_preds_ = cls_preds[batch_idx][fg_mask] + obj_preds_ = obj_preds[batch_idx][fg_mask] + num_in_boxes_anchor = bboxes_preds_per_image.shape[0] + + if mode == "cpu": + gt_bboxes_per_image = gt_bboxes_per_image.cpu() + bboxes_preds_per_image = bboxes_preds_per_image.cpu() + + pair_wise_ious = bboxes_iou(gt_bboxes_per_image, bboxes_preds_per_image, False) + + gt_cls_per_image = ( + F.one_hot(gt_classes.to(torch.int64), self.num_classes) + .float() + .unsqueeze(1) + .repeat(1, num_in_boxes_anchor, 1) + ) + pair_wise_ious_loss = -torch.log(pair_wise_ious + 1e-8) + + if mode == "cpu": + cls_preds_, obj_preds_ = cls_preds_.cpu(), obj_preds_.cpu() + + with torch.cuda.amp.autocast(enabled=False): + cls_preds_ = ( + cls_preds_.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_() + * obj_preds_.float().unsqueeze(0).repeat(num_gt, 1, 1).sigmoid_() + ) + pair_wise_cls_loss = F.binary_cross_entropy( + cls_preds_.sqrt_(), gt_cls_per_image, reduction="none" + ).sum(-1) + del cls_preds_ + + cost = ( + pair_wise_cls_loss + + 3.0 * pair_wise_ious_loss + + 100000.0 * (~is_in_boxes_and_center) + ) + + ( + num_fg, + gt_matched_classes, + pred_ious_this_matching, + matched_gt_inds, + ) = self.dynamic_k_matching(cost, pair_wise_ious, gt_classes, num_gt, fg_mask) + del pair_wise_cls_loss, cost, pair_wise_ious, pair_wise_ious_loss + + if mode == "cpu": + gt_matched_classes = gt_matched_classes.cuda() + fg_mask = fg_mask.cuda() + pred_ious_this_matching = pred_ious_this_matching.cuda() + matched_gt_inds = matched_gt_inds.cuda() + + return ( + gt_matched_classes, + fg_mask, + pred_ious_this_matching, + matched_gt_inds, + num_fg, + ) + + def get_in_boxes_info( + self, + gt_bboxes_per_image, + expanded_strides, + x_shifts, + y_shifts, + total_num_anchors, + num_gt, + img_size + ): + expanded_strides_per_image = expanded_strides[0] + x_shifts_per_image = x_shifts[0] * expanded_strides_per_image + y_shifts_per_image = y_shifts[0] * expanded_strides_per_image + x_centers_per_image = ( + (x_shifts_per_image + 0.5 * expanded_strides_per_image) + .unsqueeze(0) + .repeat(num_gt, 1) + ) # [n_anchor] -> [n_gt, n_anchor] + y_centers_per_image = ( + (y_shifts_per_image + 0.5 * expanded_strides_per_image) + .unsqueeze(0) + .repeat(num_gt, 1) + ) + + gt_bboxes_per_image_l = ( + (gt_bboxes_per_image[:, 0] - 0.5 * gt_bboxes_per_image[:, 2]) + .unsqueeze(1) + .repeat(1, total_num_anchors) + ) + gt_bboxes_per_image_r = ( + (gt_bboxes_per_image[:, 0] + 0.5 * gt_bboxes_per_image[:, 2]) + .unsqueeze(1) + .repeat(1, total_num_anchors) + ) + gt_bboxes_per_image_t = ( + (gt_bboxes_per_image[:, 1] - 0.5 * gt_bboxes_per_image[:, 3]) + .unsqueeze(1) + .repeat(1, total_num_anchors) + ) + gt_bboxes_per_image_b = ( + (gt_bboxes_per_image[:, 1] + 0.5 * gt_bboxes_per_image[:, 3]) + .unsqueeze(1) + .repeat(1, total_num_anchors) + ) + + b_l = x_centers_per_image - gt_bboxes_per_image_l + b_r = gt_bboxes_per_image_r - x_centers_per_image + b_t = y_centers_per_image - gt_bboxes_per_image_t + b_b = gt_bboxes_per_image_b - y_centers_per_image + bbox_deltas = torch.stack([b_l, b_t, b_r, b_b], 2) + + is_in_boxes = bbox_deltas.min(dim=-1).values > 0.0 + is_in_boxes_all = is_in_boxes.sum(dim=0) > 0 + # in fixed center + + center_radius = 2.5 + # clip center inside image + gt_bboxes_per_image_clip = gt_bboxes_per_image[:, 0:2].clone() + gt_bboxes_per_image_clip[:, 0] = torch.clamp(gt_bboxes_per_image_clip[:, 0], min=0, max=img_size[1]) + gt_bboxes_per_image_clip[:, 1] = torch.clamp(gt_bboxes_per_image_clip[:, 1], min=0, max=img_size[0]) + + gt_bboxes_per_image_l = (gt_bboxes_per_image_clip[:, 0]).unsqueeze(1).repeat( + 1, total_num_anchors + ) - center_radius * expanded_strides_per_image.unsqueeze(0) + gt_bboxes_per_image_r = (gt_bboxes_per_image_clip[:, 0]).unsqueeze(1).repeat( + 1, total_num_anchors + ) + center_radius * expanded_strides_per_image.unsqueeze(0) + gt_bboxes_per_image_t = (gt_bboxes_per_image_clip[:, 1]).unsqueeze(1).repeat( + 1, total_num_anchors + ) - center_radius * expanded_strides_per_image.unsqueeze(0) + gt_bboxes_per_image_b = (gt_bboxes_per_image_clip[:, 1]).unsqueeze(1).repeat( + 1, total_num_anchors + ) + center_radius * expanded_strides_per_image.unsqueeze(0) + + c_l = x_centers_per_image - gt_bboxes_per_image_l + c_r = gt_bboxes_per_image_r - x_centers_per_image + c_t = y_centers_per_image - gt_bboxes_per_image_t + c_b = gt_bboxes_per_image_b - y_centers_per_image + center_deltas = torch.stack([c_l, c_t, c_r, c_b], 2) + is_in_centers = center_deltas.min(dim=-1).values > 0.0 + is_in_centers_all = is_in_centers.sum(dim=0) > 0 + + # in boxes and in centers + is_in_boxes_anchor = is_in_boxes_all | is_in_centers_all + + is_in_boxes_and_center = ( + is_in_boxes[:, is_in_boxes_anchor] & is_in_centers[:, is_in_boxes_anchor] + ) + del gt_bboxes_per_image_clip + return is_in_boxes_anchor, is_in_boxes_and_center + + def dynamic_k_matching(self, cost, pair_wise_ious, gt_classes, num_gt, fg_mask): + # Dynamic K + # --------------------------------------------------------------- + matching_matrix = torch.zeros_like(cost) + + ious_in_boxes_matrix = pair_wise_ious + n_candidate_k = min(10, ious_in_boxes_matrix.size(1)) + topk_ious, _ = torch.topk(ious_in_boxes_matrix, n_candidate_k, dim=1) + dynamic_ks = torch.clamp(topk_ious.sum(1).int(), min=1) + for gt_idx in range(num_gt): + _, pos_idx = torch.topk( + cost[gt_idx], k=dynamic_ks[gt_idx].item(), largest=False + ) + matching_matrix[gt_idx][pos_idx] = 1.0 + + del topk_ious, dynamic_ks, pos_idx + + anchor_matching_gt = matching_matrix.sum(0) + if (anchor_matching_gt > 1).sum() > 0: + cost_min, cost_argmin = torch.min(cost[:, anchor_matching_gt > 1], dim=0) + matching_matrix[:, anchor_matching_gt > 1] *= 0.0 + matching_matrix[cost_argmin, anchor_matching_gt > 1] = 1.0 + fg_mask_inboxes = matching_matrix.sum(0) > 0.0 + num_fg = fg_mask_inboxes.sum().item() + + fg_mask[fg_mask.clone()] = fg_mask_inboxes + + matched_gt_inds = matching_matrix[:, fg_mask_inboxes].argmax(0) + gt_matched_classes = gt_classes[matched_gt_inds] + + pred_ious_this_matching = (matching_matrix * pair_wise_ious).sum(0)[ + fg_mask_inboxes + ] + return num_fg, gt_matched_classes, pred_ious_this_matching, matched_gt_inds diff --git a/yolox/models/yolo_pafpn.py b/yolox/models/yolo_pafpn.py new file mode 100644 index 0000000000000000000000000000000000000000..c419de3204f466c81f7e50fe5c7ffd17e51d63b3 --- /dev/null +++ b/yolox/models/yolo_pafpn.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +import torch +import torch.nn as nn + +from .darknet import CSPDarknet +from .network_blocks import BaseConv, CSPLayer, DWConv + + +class YOLOPAFPN(nn.Module): + """ + YOLOv3 model. Darknet 53 is the default backbone of this model. + """ + + def __init__( + self, + depth=1.0, + width=1.0, + in_features=("dark3", "dark4", "dark5"), + in_channels=[256, 512, 1024], + depthwise=False, + act="silu", + ): + super().__init__() + self.backbone = CSPDarknet(depth, width, depthwise=depthwise, act=act) + self.in_features = in_features + self.in_channels = in_channels + Conv = DWConv if depthwise else BaseConv + + self.upsample = nn.Upsample(scale_factor=2, mode="nearest") + self.lateral_conv0 = BaseConv( + int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act + ) + self.C3_p4 = CSPLayer( + int(2 * in_channels[1] * width), + int(in_channels[1] * width), + round(3 * depth), + False, + depthwise=depthwise, + act=act, + ) # cat + + self.reduce_conv1 = BaseConv( + int(in_channels[1] * width), int(in_channels[0] * width), 1, 1, act=act + ) + self.C3_p3 = CSPLayer( + int(2 * in_channels[0] * width), + int(in_channels[0] * width), + round(3 * depth), + False, + depthwise=depthwise, + act=act, + ) + + # bottom-up conv + self.bu_conv2 = Conv( + int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act + ) + self.C3_n3 = CSPLayer( + int(2 * in_channels[0] * width), + int(in_channels[1] * width), + round(3 * depth), + False, + depthwise=depthwise, + act=act, + ) + + # bottom-up conv + self.bu_conv1 = Conv( + int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act + ) + self.C3_n4 = CSPLayer( + int(2 * in_channels[1] * width), + int(in_channels[2] * width), + round(3 * depth), + False, + depthwise=depthwise, + act=act, + ) + + def forward(self, input): + """ + Args: + inputs: input images. + + Returns: + Tuple[Tensor]: FPN feature. + """ + + # backbone + out_features = self.backbone(input) + features = [out_features[f] for f in self.in_features] + [x2, x1, x0] = features + + fpn_out0 = self.lateral_conv0(x0) # 1024->512/32 + f_out0 = self.upsample(fpn_out0) # 512/16 + f_out0 = torch.cat([f_out0, x1], 1) # 512->1024/16 + f_out0 = self.C3_p4(f_out0) # 1024->512/16 + + fpn_out1 = self.reduce_conv1(f_out0) # 512->256/16 + f_out1 = self.upsample(fpn_out1) # 256/8 + f_out1 = torch.cat([f_out1, x2], 1) # 256->512/8 + pan_out2 = self.C3_p3(f_out1) # 512->256/8 + + p_out1 = self.bu_conv2(pan_out2) # 256->256/16 + p_out1 = torch.cat([p_out1, fpn_out1], 1) # 256->512/16 + pan_out1 = self.C3_n3(p_out1) # 512->512/16 + + p_out0 = self.bu_conv1(pan_out1) # 512->512/32 + p_out0 = torch.cat([p_out0, fpn_out0], 1) # 512->1024/32 + pan_out0 = self.C3_n4(p_out0) # 1024->1024/32 + + outputs = (pan_out2, pan_out1, pan_out0) + return outputs diff --git a/yolox/models/yolox.py b/yolox/models/yolox.py new file mode 100644 index 0000000000000000000000000000000000000000..2f1fa1b34baaf6e0241cf289a2f73db48b33d914 --- /dev/null +++ b/yolox/models/yolox.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +import torch.nn as nn + +from .yolo_head import YOLOXHead +from .yolo_pafpn import YOLOPAFPN + + +class YOLOX(nn.Module): + """ + YOLOX model module. The module list is defined by create_yolov3_modules function. + The network returns loss values from three YOLO layers during training + and detection results during test. + """ + + def __init__(self, backbone=None, head=None): + super().__init__() + if backbone is None: + backbone = YOLOPAFPN() + if head is None: + head = YOLOXHead(80) + + self.backbone = backbone + self.head = head + + def forward(self, x, targets=None): + # fpn output content features of [dark3, dark4, dark5] + fpn_outs = self.backbone(x) + + if self.training: + assert targets is not None + loss, iou_loss, conf_loss, cls_loss, l1_loss, num_fg = self.head( + fpn_outs, targets, x + ) + outputs = { + "total_loss": loss, + "iou_loss": iou_loss, + "l1_loss": l1_loss, + "conf_loss": conf_loss, + "cls_loss": cls_loss, + "num_fg": num_fg, + } + else: + outputs = self.head(fpn_outs) + + return outputs diff --git a/yolox/motdt_tracker/basetrack.py b/yolox/motdt_tracker/basetrack.py new file mode 100644 index 0000000000000000000000000000000000000000..88b16eea14d1cc6f238b7c56becd6e754fbea55c --- /dev/null +++ b/yolox/motdt_tracker/basetrack.py @@ -0,0 +1,56 @@ +import numpy as np +from collections import OrderedDict + + +class TrackState(object): + New = 0 + Tracked = 1 + Lost = 2 + Removed = 3 + Replaced = 4 + + +class BaseTrack(object): + _count = 0 + + track_id = 0 + is_activated = False + state = TrackState.New + + history = OrderedDict() + features = [] + curr_feature = None + score = 0 + start_frame = 0 + frame_id = 0 + time_since_update = 0 + + # multi-camera + location = (np.inf, np.inf) + + @property + def end_frame(self): + return self.frame_id + + @staticmethod + def next_id(): + BaseTrack._count += 1 + return BaseTrack._count + + def activate(self, *args): + raise NotImplementedError + + def predict(self): + raise NotImplementedError + + def update(self, *args, **kwargs): + raise NotImplementedError + + def mark_lost(self): + self.state = TrackState.Lost + + def mark_removed(self): + self.state = TrackState.Removed + + def mark_replaced(self): + self.state = TrackState.Replaced diff --git a/yolox/motdt_tracker/kalman_filter.py b/yolox/motdt_tracker/kalman_filter.py new file mode 100644 index 0000000000000000000000000000000000000000..deda8a26292b81bc6512a8f6145afabde6c16d7a --- /dev/null +++ b/yolox/motdt_tracker/kalman_filter.py @@ -0,0 +1,270 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np +import scipy.linalg + + +""" +Table for the 0.95 quantile of the chi-square distribution with N degrees of +freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv +function and used as Mahalanobis gating threshold. +""" +chi2inv95 = { + 1: 3.8415, + 2: 5.9915, + 3: 7.8147, + 4: 9.4877, + 5: 11.070, + 6: 12.592, + 7: 14.067, + 8: 15.507, + 9: 16.919} + + +class KalmanFilter(object): + """ + A simple Kalman filter for tracking bounding boxes in image space. + + The 8-dimensional state space + + x, y, a, h, vx, vy, va, vh + + contains the bounding box center position (x, y), aspect ratio a, height h, + and their respective velocities. + + Object motion follows a constant velocity model. The bounding box location + (x, y, a, h) is taken as direct observation of the state space (linear + observation model). + + """ + + def __init__(self): + ndim, dt = 4, 1. + + # Create Kalman filter model matrices. + self._motion_mat = np.eye(2 * ndim, 2 * ndim) + for i in range(ndim): + self._motion_mat[i, ndim + i] = dt + self._update_mat = np.eye(ndim, 2 * ndim) + + # Motion and observation uncertainty are chosen relative to the current + # state estimate. These weights control the amount of uncertainty in + # the model. This is a bit hacky. + self._std_weight_position = 1. / 20 + self._std_weight_velocity = 1. / 160 + + def initiate(self, measurement): + """Create track from unassociated measurement. + + Parameters + ---------- + measurement : ndarray + Bounding box coordinates (x, y, a, h) with center position (x, y), + aspect ratio a, and height h. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector (8 dimensional) and covariance matrix (8x8 + dimensional) of the new track. Unobserved velocities are initialized + to 0 mean. + + """ + mean_pos = measurement + mean_vel = np.zeros_like(mean_pos) + mean = np.r_[mean_pos, mean_vel] + + std = [ + 2 * self._std_weight_position * measurement[3], + 2 * self._std_weight_position * measurement[3], + 1e-2, + 2 * self._std_weight_position * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 1e-5, + 10 * self._std_weight_velocity * measurement[3]] + covariance = np.diag(np.square(std)) + return mean, covariance + + def predict(self, mean, covariance): + """Run Kalman filter prediction step. + + Parameters + ---------- + mean : ndarray + The 8 dimensional mean vector of the object state at the previous + time step. + covariance : ndarray + The 8x8 dimensional covariance matrix of the object state at the + previous time step. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + + """ + std_pos = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-2, + self._std_weight_position * mean[3]] + std_vel = [ + self._std_weight_velocity * mean[3], + self._std_weight_velocity * mean[3], + 1e-5, + self._std_weight_velocity * mean[3]] + motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) + + #mean = np.dot(self._motion_mat, mean) + mean = np.dot(mean, self._motion_mat.T) + covariance = np.linalg.multi_dot(( + self._motion_mat, covariance, self._motion_mat.T)) + motion_cov + + return mean, covariance + + def project(self, mean, covariance): + """Project state distribution to measurement space. + + Parameters + ---------- + mean : ndarray + The state's mean vector (8 dimensional array). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + + Returns + ------- + (ndarray, ndarray) + Returns the projected mean and covariance matrix of the given state + estimate. + + """ + std = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-1, + self._std_weight_position * mean[3]] + innovation_cov = np.diag(np.square(std)) + + mean = np.dot(self._update_mat, mean) + covariance = np.linalg.multi_dot(( + self._update_mat, covariance, self._update_mat.T)) + return mean, covariance + innovation_cov + + def multi_predict(self, mean, covariance): + """Run Kalman filter prediction step (Vectorized version). + Parameters + ---------- + mean : ndarray + The Nx8 dimensional mean matrix of the object states at the previous + time step. + covariance : ndarray + The Nx8x8 dimensional covariance matrics of the object states at the + previous time step. + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[:, 3], + self._std_weight_position * mean[:, 3], + 1e-2 * np.ones_like(mean[:, 3]), + self._std_weight_position * mean[:, 3]] + std_vel = [ + self._std_weight_velocity * mean[:, 3], + self._std_weight_velocity * mean[:, 3], + 1e-5 * np.ones_like(mean[:, 3]), + self._std_weight_velocity * mean[:, 3]] + sqr = np.square(np.r_[std_pos, std_vel]).T + + motion_cov = [] + for i in range(len(mean)): + motion_cov.append(np.diag(sqr[i])) + motion_cov = np.asarray(motion_cov) + + mean = np.dot(mean, self._motion_mat.T) + left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2)) + covariance = np.dot(left, self._motion_mat.T) + motion_cov + + return mean, covariance + + def update(self, mean, covariance, measurement): + """Run Kalman filter correction step. + + Parameters + ---------- + mean : ndarray + The predicted state's mean vector (8 dimensional). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + measurement : ndarray + The 4 dimensional measurement vector (x, y, a, h), where (x, y) + is the center position, a the aspect ratio, and h the height of the + bounding box. + + Returns + ------- + (ndarray, ndarray) + Returns the measurement-corrected state distribution. + + """ + projected_mean, projected_cov = self.project(mean, covariance) + + chol_factor, lower = scipy.linalg.cho_factor( + projected_cov, lower=True, check_finite=False) + kalman_gain = scipy.linalg.cho_solve( + (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, + check_finite=False).T + innovation = measurement - projected_mean + + new_mean = mean + np.dot(innovation, kalman_gain.T) + new_covariance = covariance - np.linalg.multi_dot(( + kalman_gain, projected_cov, kalman_gain.T)) + return new_mean, new_covariance + + def gating_distance(self, mean, covariance, measurements, + only_position=False, metric='maha'): + """Compute gating distance between state distribution and measurements. + A suitable distance threshold can be obtained from `chi2inv95`. If + `only_position` is False, the chi-square distribution has 4 degrees of + freedom, otherwise 2. + Parameters + ---------- + mean : ndarray + Mean vector over the state distribution (8 dimensional). + covariance : ndarray + Covariance of the state distribution (8x8 dimensional). + measurements : ndarray + An Nx4 dimensional matrix of N measurements, each in + format (x, y, a, h) where (x, y) is the bounding box center + position, a the aspect ratio, and h the height. + only_position : Optional[bool] + If True, distance computation is done with respect to the bounding + box center position only. + Returns + ------- + ndarray + Returns an array of length N, where the i-th element contains the + squared Mahalanobis distance between (mean, covariance) and + `measurements[i]`. + """ + mean, covariance = self.project(mean, covariance) + if only_position: + mean, covariance = mean[:2], covariance[:2, :2] + measurements = measurements[:, :2] + + d = measurements - mean + if metric == 'gaussian': + return np.sum(d * d, axis=1) + elif metric == 'maha': + cholesky_factor = np.linalg.cholesky(covariance) + z = scipy.linalg.solve_triangular( + cholesky_factor, d.T, lower=True, check_finite=False, + overwrite_b=True) + squared_maha = np.sum(z * z, axis=0) + return squared_maha + else: + raise ValueError('invalid distance metric') \ No newline at end of file diff --git a/yolox/motdt_tracker/matching.py b/yolox/motdt_tracker/matching.py new file mode 100644 index 0000000000000000000000000000000000000000..01d07da874a793c06eecba172d1e44c7a368234b --- /dev/null +++ b/yolox/motdt_tracker/matching.py @@ -0,0 +1,116 @@ +import cv2 +import numpy as np +import lap +from scipy.spatial.distance import cdist + +from cython_bbox import bbox_overlaps as bbox_ious +from yolox.motdt_tracker import kalman_filter + + +def _indices_to_matches(cost_matrix, indices, thresh): + matched_cost = cost_matrix[tuple(zip(*indices))] + matched_mask = (matched_cost <= thresh) + + matches = indices[matched_mask] + unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0])) + unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1])) + + return matches, unmatched_a, unmatched_b + + +def linear_assignment(cost_matrix, thresh): + if cost_matrix.size == 0: + return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) + matches, unmatched_a, unmatched_b = [], [], [] + cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) + for ix, mx in enumerate(x): + if mx >= 0: + matches.append([ix, mx]) + unmatched_a = np.where(x < 0)[0] + unmatched_b = np.where(y < 0)[0] + matches = np.asarray(matches) + return matches, unmatched_a, unmatched_b + + +def ious(atlbrs, btlbrs): + """ + Compute cost based on IoU + :type atlbrs: list[tlbr] | np.ndarray + :type atlbrs: list[tlbr] | np.ndarray + :rtype ious np.ndarray + """ + ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) + if ious.size == 0: + return ious + + ious = bbox_ious( + np.ascontiguousarray(atlbrs, dtype=np.float), + np.ascontiguousarray(btlbrs, dtype=np.float) + ) + + return ious + + +def iou_distance(atracks, btracks): + """ + Compute cost based on IoU + :type atracks: list[STrack] + :type btracks: list[STrack] + :rtype cost_matrix np.ndarray + """ + atlbrs = [track.tlbr for track in atracks] + btlbrs = [track.tlbr for track in btracks] + _ious = ious(atlbrs, btlbrs) + cost_matrix = 1 - _ious + + return cost_matrix + + +def nearest_reid_distance(tracks, detections, metric='cosine'): + """ + Compute cost based on ReID features + :type tracks: list[STrack] + :type detections: list[BaseTrack] + :rtype cost_matrix np.ndarray + """ + cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) + if cost_matrix.size == 0: + return cost_matrix + + det_features = np.asarray([track.curr_feature for track in detections], dtype=np.float32) + for i, track in enumerate(tracks): + cost_matrix[i, :] = np.maximum(0.0, cdist(track.features, det_features, metric).min(axis=0)) + + return cost_matrix + + +def mean_reid_distance(tracks, detections, metric='cosine'): + """ + Compute cost based on ReID features + :type tracks: list[STrack] + :type detections: list[BaseTrack] + :type metric: str + :rtype cost_matrix np.ndarray + """ + cost_matrix = np.empty((len(tracks), len(detections)), dtype=np.float) + if cost_matrix.size == 0: + return cost_matrix + + track_features = np.asarray([track.curr_feature for track in tracks], dtype=np.float32) + det_features = np.asarray([track.curr_feature for track in detections], dtype=np.float32) + cost_matrix = cdist(track_features, det_features, metric) + + return cost_matrix + + +def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False): + if cost_matrix.size == 0: + return cost_matrix + gating_dim = 2 if only_position else 4 + gating_threshold = kalman_filter.chi2inv95[gating_dim] + measurements = np.asarray([det.to_xyah() for det in detections]) + for row, track in enumerate(tracks): + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position) + cost_matrix[row, gating_distance > gating_threshold] = np.inf + return cost_matrix \ No newline at end of file diff --git a/yolox/motdt_tracker/motdt_tracker.py b/yolox/motdt_tracker/motdt_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..d82908ea4285f8ef709e65788bff43ca9f1fd45e --- /dev/null +++ b/yolox/motdt_tracker/motdt_tracker.py @@ -0,0 +1,358 @@ +import numpy as np +#from numba import jit +from collections import OrderedDict, deque +import itertools +import os +import cv2 +import torch +from torch._C import dtype +import torchvision + +from yolox.motdt_tracker import matching +from .kalman_filter import KalmanFilter +from .reid_model import load_reid_model, extract_reid_features +from yolox.data.dataloading import get_yolox_datadir + +from .basetrack import BaseTrack, TrackState + + +class STrack(BaseTrack): + + def __init__(self, tlwh, score, max_n_features=100, from_det=True): + + # wait activate + self._tlwh = np.asarray(tlwh, dtype=np.float) + self.kalman_filter = None + self.mean, self.covariance = None, None + self.is_activated = False + + self.score = score + self.max_n_features = max_n_features + self.curr_feature = None + self.last_feature = None + self.features = deque([], maxlen=self.max_n_features) + + # classification + self.from_det = from_det + self.tracklet_len = 0 + self.time_by_tracking = 0 + + # self-tracking + self.tracker = None + + def set_feature(self, feature): + if feature is None: + return False + self.features.append(feature) + self.curr_feature = feature + self.last_feature = feature + # self._p_feature = 0 + return True + + def predict(self): + if self.time_since_update > 0: + self.tracklet_len = 0 + + self.time_since_update += 1 + + mean_state = self.mean.copy() + if self.state != TrackState.Tracked: + mean_state[7] = 0 + self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) + + if self.tracker: + self.tracker.update_roi(self.tlwh) + + def self_tracking(self, image): + tlwh = self.tracker.predict(image) if self.tracker else self.tlwh + return tlwh + + def activate(self, kalman_filter, frame_id, image): + """Start a new tracklet""" + self.kalman_filter = kalman_filter # type: KalmanFilter + self.track_id = self.next_id() + # cx, cy, aspect_ratio, height, dx, dy, da, dh + self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) + + # self.tracker = sot.SingleObjectTracker() + # self.tracker.init(image, self.tlwh) + + del self._tlwh + + self.time_since_update = 0 + self.time_by_tracking = 0 + self.tracklet_len = 0 + self.state = TrackState.Tracked + # self.is_activated = True + self.frame_id = frame_id + self.start_frame = frame_id + + def re_activate(self, new_track, frame_id, image, new_id=False): + # self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(new_track.tlwh)) + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) + ) + self.time_since_update = 0 + self.time_by_tracking = 0 + self.tracklet_len = 0 + self.state = TrackState.Tracked + self.is_activated = True + self.frame_id = frame_id + if new_id: + self.track_id = self.next_id() + + self.set_feature(new_track.curr_feature) + + def update(self, new_track, frame_id, image, update_feature=True): + """ + Update a matched track + :type new_track: STrack + :type frame_id: int + :type update_feature: bool + :return: + """ + self.frame_id = frame_id + self.time_since_update = 0 + if new_track.from_det: + self.time_by_tracking = 0 + else: + self.time_by_tracking += 1 + self.tracklet_len += 1 + + new_tlwh = new_track.tlwh + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) + self.state = TrackState.Tracked + self.is_activated = True + + self.score = new_track.score + + if update_feature: + self.set_feature(new_track.curr_feature) + if self.tracker: + self.tracker.update(image, self.tlwh) + + @property + #@jit + def tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + """ + if self.mean is None: + return self._tlwh.copy() + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + @property + #@jit + def tlbr(self): + """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + @staticmethod + #@jit + def tlwh_to_xyah(tlwh): + """Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = np.asarray(tlwh).copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + def to_xyah(self): + return self.tlwh_to_xyah(self.tlwh) + + def tracklet_score(self): + # score = (1 - np.exp(-0.6 * self.hit_streak)) * np.exp(-0.03 * self.time_by_tracking) + + score = max(0, 1 - np.log(1 + 0.05 * self.time_by_tracking)) * (self.tracklet_len - self.time_by_tracking > 2) + # score = max(0, 1 - np.log(1 + 0.05 * self.n_tracking)) * (1 - np.exp(-0.6 * self.hit_streak)) + return score + + def __repr__(self): + return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) + + +class OnlineTracker(object): + + def __init__(self, model_folder, min_cls_score=0.4, min_ap_dist=0.8, max_time_lost=30, use_tracking=True, use_refind=True): + + self.min_cls_score = min_cls_score + self.min_ap_dist = min_ap_dist + self.max_time_lost = max_time_lost + + self.kalman_filter = KalmanFilter() + + self.tracked_stracks = [] # type: list[STrack] + self.lost_stracks = [] # type: list[STrack] + self.removed_stracks = [] # type: list[STrack] + + self.use_refind = use_refind + self.use_tracking = use_tracking + self.classifier = None + self.reid_model = load_reid_model(model_folder) + + self.frame_id = 0 + + def update(self, output_results, img_info, img_size, img_file_name): + img_file_name = os.path.join(get_yolox_datadir(), 'mot', 'train', img_file_name) + image = cv2.imread(img_file_name) + # post process detections + output_results = output_results.cpu().numpy() + confidences = output_results[:, 4] * output_results[:, 5] + + bboxes = output_results[:, :4] # x1y1x2y2 + img_h, img_w = img_info[0], img_info[1] + scale = min(img_size[0] / float(img_h), img_size[1] / float(img_w)) + bboxes /= scale + bbox_xyxy = bboxes + tlwhs = self._xyxy_to_tlwh_array(bbox_xyxy) + remain_inds = confidences > self.min_cls_score + tlwhs = tlwhs[remain_inds] + det_scores = confidences[remain_inds] + + self.frame_id += 1 + + activated_starcks = [] + refind_stracks = [] + lost_stracks = [] + removed_stracks = [] + + """step 1: prediction""" + for strack in itertools.chain(self.tracked_stracks, self.lost_stracks): + strack.predict() + + """step 2: scoring and selection""" + if det_scores is None: + det_scores = np.ones(len(tlwhs), dtype=float) + detections = [STrack(tlwh, score, from_det=True) for tlwh, score in zip(tlwhs, det_scores)] + if self.use_tracking: + tracks = [STrack(t.self_tracking(image), t.score * t.tracklet_score(), from_det=False) + for t in itertools.chain(self.tracked_stracks, self.lost_stracks) if t.is_activated] + detections.extend(tracks) + rois = np.asarray([d.tlbr for d in detections], dtype=np.float32) + scores = np.asarray([d.score for d in detections], dtype=np.float32) + # nms + if len(detections) > 0: + nms_out_index = torchvision.ops.batched_nms( + torch.from_numpy(rois), + torch.from_numpy(scores.reshape(-1)).to(torch.from_numpy(rois).dtype), + torch.zeros_like(torch.from_numpy(scores.reshape(-1))), + 0.7, + ) + keep = nms_out_index.numpy() + mask = np.zeros(len(rois), dtype=np.bool) + mask[keep] = True + keep = np.where(mask & (scores >= self.min_cls_score))[0] + detections = [detections[i] for i in keep] + scores = scores[keep] + for d, score in zip(detections, scores): + d.score = score + pred_dets = [d for d in detections if not d.from_det] + detections = [d for d in detections if d.from_det] + + # set features + tlbrs = [det.tlbr for det in detections] + features = extract_reid_features(self.reid_model, image, tlbrs) + features = features.cpu().numpy() + for i, det in enumerate(detections): + det.set_feature(features[i]) + + """step 3: association for tracked""" + # matching for tracked targets + unconfirmed = [] + tracked_stracks = [] # type: list[STrack] + for track in self.tracked_stracks: + if not track.is_activated: + unconfirmed.append(track) + else: + tracked_stracks.append(track) + + dists = matching.nearest_reid_distance(tracked_stracks, detections, metric='euclidean') + dists = matching.gate_cost_matrix(self.kalman_filter, dists, tracked_stracks, detections) + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.min_ap_dist) + for itracked, idet in matches: + tracked_stracks[itracked].update(detections[idet], self.frame_id, image) + + # matching for missing targets + detections = [detections[i] for i in u_detection] + dists = matching.nearest_reid_distance(self.lost_stracks, detections, metric='euclidean') + dists = matching.gate_cost_matrix(self.kalman_filter, dists, self.lost_stracks, detections) + matches, u_lost, u_detection = matching.linear_assignment(dists, thresh=self.min_ap_dist) + for ilost, idet in matches: + track = self.lost_stracks[ilost] # type: STrack + det = detections[idet] + track.re_activate(det, self.frame_id, image, new_id=not self.use_refind) + refind_stracks.append(track) + + # remaining tracked + # tracked + len_det = len(u_detection) + detections = [detections[i] for i in u_detection] + pred_dets + r_tracked_stracks = [tracked_stracks[i] for i in u_track] + dists = matching.iou_distance(r_tracked_stracks, detections) + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=0.5) + for itracked, idet in matches: + r_tracked_stracks[itracked].update(detections[idet], self.frame_id, image, update_feature=True) + for it in u_track: + track = r_tracked_stracks[it] + track.mark_lost() + lost_stracks.append(track) + + # unconfirmed + detections = [detections[i] for i in u_detection if i < len_det] + dists = matching.iou_distance(unconfirmed, detections) + matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) + for itracked, idet in matches: + unconfirmed[itracked].update(detections[idet], self.frame_id, image, update_feature=True) + for it in u_unconfirmed: + track = unconfirmed[it] + track.mark_removed() + removed_stracks.append(track) + + """step 4: init new stracks""" + for inew in u_detection: + track = detections[inew] + if not track.from_det or track.score < 0.6: + continue + track.activate(self.kalman_filter, self.frame_id, image) + activated_starcks.append(track) + + """step 6: update state""" + for track in self.lost_stracks: + if self.frame_id - track.end_frame > self.max_time_lost: + track.mark_removed() + removed_stracks.append(track) + + self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] + self.lost_stracks = [t for t in self.lost_stracks if t.state == TrackState.Lost] # type: list[STrack] + self.tracked_stracks.extend(activated_starcks) + self.tracked_stracks.extend(refind_stracks) + self.lost_stracks.extend(lost_stracks) + self.removed_stracks.extend(removed_stracks) + + # output_stracks = self.tracked_stracks + self.lost_stracks + + # get scores of lost tracks + output_tracked_stracks = [track for track in self.tracked_stracks if track.is_activated] + + output_stracks = output_tracked_stracks + + return output_stracks + + @staticmethod + def _xyxy_to_tlwh_array(bbox_xyxy): + if isinstance(bbox_xyxy, np.ndarray): + bbox_tlwh = bbox_xyxy.copy() + elif isinstance(bbox_xyxy, torch.Tensor): + bbox_tlwh = bbox_xyxy.clone() + bbox_tlwh[:, 2] = bbox_xyxy[:, 2] - bbox_xyxy[:, 0] + bbox_tlwh[:, 3] = bbox_xyxy[:, 3] - bbox_xyxy[:, 1] + return bbox_tlwh diff --git a/yolox/motdt_tracker/reid_model.py b/yolox/motdt_tracker/reid_model.py new file mode 100644 index 0000000000000000000000000000000000000000..6ad49e340584452da3eeea487b0f8bf7d7e5db5d --- /dev/null +++ b/yolox/motdt_tracker/reid_model.py @@ -0,0 +1,270 @@ +import cv2 +import numpy as np +import torch +from torch.autograd import Variable +import torch.nn.functional as F +import torch.nn as nn +import pickle +import os +from torch.nn.modules import CrossMapLRN2d as SpatialCrossMapLRN +#from torch.legacy.nn import SpatialCrossMapLRN as SpatialCrossMapLRNOld +from torch.autograd import Function, Variable +from torch.nn import Module + + +def clip_boxes(boxes, im_shape): + """ + Clip boxes to image boundaries. + """ + boxes = np.asarray(boxes) + if boxes.shape[0] == 0: + return boxes + boxes = np.copy(boxes) + # x1 >= 0 + boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) + # y1 >= 0 + boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) + # x2 < im_shape[1] + boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) + # y2 < im_shape[0] + boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) + return boxes + + +def load_net(fname, net, prefix='', load_state_dict=False): + import h5py + with h5py.File(fname, mode='r') as h5f: + h5f_is_module = True + for k in h5f.keys(): + if not str(k).startswith('module.'): + h5f_is_module = False + break + if prefix == '' and not isinstance(net, nn.DataParallel) and h5f_is_module: + prefix = 'module.' + + for k, v in net.state_dict().items(): + k = prefix + k + if k in h5f: + param = torch.from_numpy(np.asarray(h5f[k])) + if v.size() != param.size(): + print('Inconsistent shape: {}, {}'.format(v.size(), param.size())) + else: + v.copy_(param) + else: + print.warning('No layer: {}'.format(k)) + + epoch = h5f.attrs['epoch'] if 'epoch' in h5f.attrs else -1 + + if not load_state_dict: + if 'learning_rates' in h5f.attrs: + lr = h5f.attrs['learning_rates'] + else: + lr = h5f.attrs.get('lr', -1) + lr = np.asarray([lr] if lr > 0 else [], dtype=np.float) + + return epoch, lr + + state_file = fname + '.optimizer_state.pk' + if os.path.isfile(state_file): + with open(state_file, 'rb') as f: + state_dicts = pickle.load(f) + if not isinstance(state_dicts, list): + state_dicts = [state_dicts] + else: + state_dicts = None + return epoch, state_dicts + + +# class SpatialCrossMapLRNFunc(Function): + +# def __init__(self, size, alpha=1e-4, beta=0.75, k=1): +# self.size = size +# self.alpha = alpha +# self.beta = beta +# self.k = k + +# def forward(self, input): +# self.save_for_backward(input) +# self.lrn = SpatialCrossMapLRNOld(self.size, self.alpha, self.beta, self.k) +# self.lrn.type(input.type()) +# return self.lrn.forward(input) + +# def backward(self, grad_output): +# input, = self.saved_tensors +# return self.lrn.backward(input, grad_output) + + +# # use this one instead +# class SpatialCrossMapLRN(Module): +# def __init__(self, size, alpha=1e-4, beta=0.75, k=1): +# super(SpatialCrossMapLRN, self).__init__() +# self.size = size +# self.alpha = alpha +# self.beta = beta +# self.k = k + +# def forward(self, input): +# return SpatialCrossMapLRNFunc(self.size, self.alpha, self.beta, self.k)(input) + + +class Inception(nn.Module): + def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes): + super(Inception, self).__init__() + # 1x1 conv branch + self.b1 = nn.Sequential( + nn.Conv2d(in_planes, n1x1, kernel_size=1), + nn.ReLU(True), + ) + + # 1x1 conv -> 3x3 conv branch + self.b2 = nn.Sequential( + nn.Conv2d(in_planes, n3x3red, kernel_size=1), + nn.ReLU(True), + nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1), + nn.ReLU(True), + ) + + # 1x1 conv -> 5x5 conv branch + self.b3 = nn.Sequential( + nn.Conv2d(in_planes, n5x5red, kernel_size=1), + nn.ReLU(True), + + nn.Conv2d(n5x5red, n5x5, kernel_size=5, padding=2), + nn.ReLU(True), + ) + + # 3x3 pool -> 1x1 conv branch + self.b4 = nn.Sequential( + nn.MaxPool2d(3, stride=1, padding=1), + + nn.Conv2d(in_planes, pool_planes, kernel_size=1), + nn.ReLU(True), + ) + + def forward(self, x): + y1 = self.b1(x) + y2 = self.b2(x) + y3 = self.b3(x) + y4 = self.b4(x) + return torch.cat([y1,y2,y3,y4], 1) + + +class GoogLeNet(nn.Module): + + output_channels = 832 + + def __init__(self): + super(GoogLeNet, self).__init__() + self.pre_layers = nn.Sequential( + nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3), + nn.ReLU(True), + + nn.MaxPool2d(3, stride=2, ceil_mode=True), + SpatialCrossMapLRN(5), + + nn.Conv2d(64, 64, 1), + nn.ReLU(True), + + nn.Conv2d(64, 192, 3, padding=1), + nn.ReLU(True), + + SpatialCrossMapLRN(5), + nn.MaxPool2d(3, stride=2, ceil_mode=True), + ) + + self.a3 = Inception(192, 64, 96, 128, 16, 32, 32) + self.b3 = Inception(256, 128, 128, 192, 32, 96, 64) + + self.maxpool = nn.MaxPool2d(3, stride=2, ceil_mode=True) + + self.a4 = Inception(480, 192, 96, 208, 16, 48, 64) + self.b4 = Inception(512, 160, 112, 224, 24, 64, 64) + self.c4 = Inception(512, 128, 128, 256, 24, 64, 64) + self.d4 = Inception(512, 112, 144, 288, 32, 64, 64) + self.e4 = Inception(528, 256, 160, 320, 32, 128, 128) + + def forward(self, x): + out = self.pre_layers(x) + out = self.a3(out) + out = self.b3(out) + out = self.maxpool(out) + out = self.a4(out) + out = self.b4(out) + out = self.c4(out) + out = self.d4(out) + out = self.e4(out) + + return out + + +class Model(nn.Module): + def __init__(self, n_parts=8): + super(Model, self).__init__() + self.n_parts = n_parts + + self.feat_conv = GoogLeNet() + self.conv_input_feat = nn.Conv2d(self.feat_conv.output_channels, 512, 1) + + # part net + self.conv_att = nn.Conv2d(512, self.n_parts, 1) + + for i in range(self.n_parts): + setattr(self, 'linear_feature{}'.format(i+1), nn.Linear(512, 64)) + + def forward(self, x): + feature = self.feat_conv(x) + feature = self.conv_input_feat(feature) + + att_weights = torch.sigmoid(self.conv_att(feature)) + + linear_feautres = [] + for i in range(self.n_parts): + masked_feature = feature * torch.unsqueeze(att_weights[:, i], 1) + pooled_feature = F.avg_pool2d(masked_feature, masked_feature.size()[2:4]) + linear_feautres.append( + getattr(self, 'linear_feature{}'.format(i+1))(pooled_feature.view(pooled_feature.size(0), -1)) + ) + + concat_features = torch.cat(linear_feautres, 1) + normed_feature = concat_features / torch.clamp(torch.norm(concat_features, 2, 1, keepdim=True), min=1e-6) + + return normed_feature + + +def load_reid_model(ckpt): + model = Model(n_parts=8) + model.inp_size = (80, 160) + load_net(ckpt, model) + print('Load ReID model from {}'.format(ckpt)) + + model = model.cuda() + model.eval() + return model + + +def im_preprocess(image): + image = np.asarray(image, np.float32) + image -= np.array([104, 117, 123], dtype=np.float32).reshape(1, 1, -1) + image = image.transpose((2, 0, 1)) + return image + + +def extract_image_patches(image, bboxes): + bboxes = np.round(bboxes).astype(np.int) + bboxes = clip_boxes(bboxes, image.shape) + patches = [image[box[1]:box[3], box[0]:box[2]] for box in bboxes] + return patches + + +def extract_reid_features(reid_model, image, tlbrs): + if len(tlbrs) == 0: + return torch.FloatTensor() + + patches = extract_image_patches(image, tlbrs) + patches = np.asarray([im_preprocess(cv2.resize(p, reid_model.inp_size)) for p in patches], dtype=np.float32) + + with torch.no_grad(): + im_var = Variable(torch.from_numpy(patches)) + im_var = im_var.cuda() + features = reid_model(im_var).data + return features \ No newline at end of file diff --git a/yolox/sort_tracker/sort.py b/yolox/sort_tracker/sort.py new file mode 100644 index 0000000000000000000000000000000000000000..9b708ba85f392f1097fec6aaa916ca571b09e3df --- /dev/null +++ b/yolox/sort_tracker/sort.py @@ -0,0 +1,251 @@ +""" + SORT: A Simple, Online and Realtime Tracker + Copyright (C) 2016-2020 Alex Bewley alex@bewley.ai + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + You should have received a copy of the GNU General Public License + along with this program. If not, see . +""" +from __future__ import print_function + +import os +import numpy as np + +from filterpy.kalman import KalmanFilter + +np.random.seed(0) + + +def linear_assignment(cost_matrix): + try: + import lap + _, x, y = lap.lapjv(cost_matrix, extend_cost=True) + return np.array([[y[i],i] for i in x if i >= 0]) # + except ImportError: + from scipy.optimize import linear_sum_assignment + x, y = linear_sum_assignment(cost_matrix) + return np.array(list(zip(x, y))) + + +def iou_batch(bb_test, bb_gt): + """ + From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2] + """ + bb_gt = np.expand_dims(bb_gt, 0) + bb_test = np.expand_dims(bb_test, 1) + + xx1 = np.maximum(bb_test[..., 0], bb_gt[..., 0]) + yy1 = np.maximum(bb_test[..., 1], bb_gt[..., 1]) + xx2 = np.minimum(bb_test[..., 2], bb_gt[..., 2]) + yy2 = np.minimum(bb_test[..., 3], bb_gt[..., 3]) + w = np.maximum(0., xx2 - xx1) + h = np.maximum(0., yy2 - yy1) + wh = w * h + o = wh / ((bb_test[..., 2] - bb_test[..., 0]) * (bb_test[..., 3] - bb_test[..., 1]) + + (bb_gt[..., 2] - bb_gt[..., 0]) * (bb_gt[..., 3] - bb_gt[..., 1]) - wh) + return(o) + + +def convert_bbox_to_z(bbox): + """ + Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form + [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is + the aspect ratio + """ + w = bbox[2] - bbox[0] + h = bbox[3] - bbox[1] + x = bbox[0] + w/2. + y = bbox[1] + h/2. + s = w * h #scale is just area + r = w / float(h) + return np.array([x, y, s, r]).reshape((4, 1)) + + +def convert_x_to_bbox(x,score=None): + """ + Takes a bounding box in the centre form [x,y,s,r] and returns it in the form + [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right + """ + w = np.sqrt(x[2] * x[3]) + h = x[2] / w + if(score==None): + return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4)) + else: + return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5)) + + +class KalmanBoxTracker(object): + """ + This class represents the internal state of individual tracked objects observed as bbox. + """ + count = 0 + def __init__(self,bbox): + """ + Initialises a tracker using initial bounding box. + """ + #define constant velocity model + self.kf = KalmanFilter(dim_x=7, dim_z=4) + self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0], [0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]]) + self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]]) + + self.kf.R[2:,2:] *= 10. + self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities + self.kf.P *= 10. + self.kf.Q[-1,-1] *= 0.01 + self.kf.Q[4:,4:] *= 0.01 + + self.kf.x[:4] = convert_bbox_to_z(bbox) + self.time_since_update = 0 + self.id = KalmanBoxTracker.count + KalmanBoxTracker.count += 1 + self.history = [] + self.hits = 0 + self.hit_streak = 0 + self.age = 0 + + def update(self,bbox): + """ + Updates the state vector with observed bbox. + """ + self.time_since_update = 0 + self.history = [] + self.hits += 1 + self.hit_streak += 1 + self.kf.update(convert_bbox_to_z(bbox)) + + def predict(self): + """ + Advances the state vector and returns the predicted bounding box estimate. + """ + if((self.kf.x[6]+self.kf.x[2])<=0): + self.kf.x[6] *= 0.0 + self.kf.predict() + self.age += 1 + if(self.time_since_update>0): + self.hit_streak = 0 + self.time_since_update += 1 + self.history.append(convert_x_to_bbox(self.kf.x)) + return self.history[-1] + + def get_state(self): + """ + Returns the current bounding box estimate. + """ + return convert_x_to_bbox(self.kf.x) + + +def associate_detections_to_trackers(detections,trackers,iou_threshold = 0.3): + """ + Assigns detections to tracked object (both represented as bounding boxes) + Returns 3 lists of matches, unmatched_detections and unmatched_trackers + """ + if(len(trackers)==0): + return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int) + + iou_matrix = iou_batch(detections, trackers) + + if min(iou_matrix.shape) > 0: + a = (iou_matrix > iou_threshold).astype(np.int32) + if a.sum(1).max() == 1 and a.sum(0).max() == 1: + matched_indices = np.stack(np.where(a), axis=1) + else: + matched_indices = linear_assignment(-iou_matrix) + else: + matched_indices = np.empty(shape=(0,2)) + + unmatched_detections = [] + for d, det in enumerate(detections): + if(d not in matched_indices[:,0]): + unmatched_detections.append(d) + unmatched_trackers = [] + for t, trk in enumerate(trackers): + if(t not in matched_indices[:,1]): + unmatched_trackers.append(t) + + #filter out matched with low IOU + matches = [] + for m in matched_indices: + if(iou_matrix[m[0], m[1]] self.det_thresh + dets = dets[remain_inds] + # get predicted locations from existing trackers. + trks = np.zeros((len(self.trackers), 5)) + to_del = [] + ret = [] + for t, trk in enumerate(trks): + pos = self.trackers[t].predict()[0] + trk[:] = [pos[0], pos[1], pos[2], pos[3], 0] + if np.any(np.isnan(pos)): + to_del.append(t) + trks = np.ma.compress_rows(np.ma.masked_invalid(trks)) + for t in reversed(to_del): + self.trackers.pop(t) + matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets, trks, self.iou_threshold) + + # update matched trackers with assigned detections + for m in matched: + self.trackers[m[1]].update(dets[m[0], :]) + + # create and initialise new trackers for unmatched detections + for i in unmatched_dets: + trk = KalmanBoxTracker(dets[i,:]) + self.trackers.append(trk) + i = len(self.trackers) + for trk in reversed(self.trackers): + d = trk.get_state()[0] + if (trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits): + ret.append(np.concatenate((d,[trk.id+1])).reshape(1,-1)) # +1 as MOT benchmark requires positive + i -= 1 + # remove dead tracklet + if(trk.time_since_update > self.max_age): + self.trackers.pop(i) + if(len(ret)>0): + return np.concatenate(ret) + return np.empty((0,5)) \ No newline at end of file diff --git a/yolox/tracker/basetrack.py b/yolox/tracker/basetrack.py new file mode 100644 index 0000000000000000000000000000000000000000..a7130b5cc08ac55705c155594d0f2a1d09f96774 --- /dev/null +++ b/yolox/tracker/basetrack.py @@ -0,0 +1,52 @@ +import numpy as np +from collections import OrderedDict + + +class TrackState(object): + New = 0 + Tracked = 1 + Lost = 2 + Removed = 3 + + +class BaseTrack(object): + _count = 0 + + track_id = 0 + is_activated = False + state = TrackState.New + + history = OrderedDict() + features = [] + curr_feature = None + score = 0 + start_frame = 0 + frame_id = 0 + time_since_update = 0 + + # multi-camera + location = (np.inf, np.inf) + + @property + def end_frame(self): + return self.frame_id + + @staticmethod + def next_id(): + BaseTrack._count += 1 + return BaseTrack._count + + def activate(self, *args): + raise NotImplementedError + + def predict(self): + raise NotImplementedError + + def update(self, *args, **kwargs): + raise NotImplementedError + + def mark_lost(self): + self.state = TrackState.Lost + + def mark_removed(self): + self.state = TrackState.Removed \ No newline at end of file diff --git a/yolox/tracker/byte_tracker.py b/yolox/tracker/byte_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..2d004599bba96ff4ba5fc1e9ad943e64361067e3 --- /dev/null +++ b/yolox/tracker/byte_tracker.py @@ -0,0 +1,330 @@ +import numpy as np +from collections import deque +import os +import os.path as osp +import copy +import torch +import torch.nn.functional as F + +from .kalman_filter import KalmanFilter +from yolox.tracker import matching +from .basetrack import BaseTrack, TrackState + +class STrack(BaseTrack): + shared_kalman = KalmanFilter() + def __init__(self, tlwh, score): + + # wait activate + self._tlwh = np.asarray(tlwh, dtype=np.float) + self.kalman_filter = None + self.mean, self.covariance = None, None + self.is_activated = False + + self.score = score + self.tracklet_len = 0 + + def predict(self): + mean_state = self.mean.copy() + if self.state != TrackState.Tracked: + mean_state[7] = 0 + self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) + + @staticmethod + def multi_predict(stracks): + if len(stracks) > 0: + multi_mean = np.asarray([st.mean.copy() for st in stracks]) + multi_covariance = np.asarray([st.covariance for st in stracks]) + for i, st in enumerate(stracks): + if st.state != TrackState.Tracked: + multi_mean[i][7] = 0 + multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance) + for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): + stracks[i].mean = mean + stracks[i].covariance = cov + + def activate(self, kalman_filter, frame_id): + """Start a new tracklet""" + self.kalman_filter = kalman_filter + self.track_id = self.next_id() + self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) + + self.tracklet_len = 0 + self.state = TrackState.Tracked + if frame_id == 1: + self.is_activated = True + # self.is_activated = True + self.frame_id = frame_id + self.start_frame = frame_id + + def re_activate(self, new_track, frame_id, new_id=False): + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) + ) + self.tracklet_len = 0 + self.state = TrackState.Tracked + self.is_activated = True + self.frame_id = frame_id + if new_id: + self.track_id = self.next_id() + self.score = new_track.score + + def update(self, new_track, frame_id): + """ + Update a matched track + :type new_track: STrack + :type frame_id: int + :type update_feature: bool + :return: + """ + self.frame_id = frame_id + self.tracklet_len += 1 + + new_tlwh = new_track.tlwh + self.mean, self.covariance = self.kalman_filter.update( + self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) + self.state = TrackState.Tracked + self.is_activated = True + + self.score = new_track.score + + @property + # @jit(nopython=True) + def tlwh(self): + """Get current position in bounding box format `(top left x, top left y, + width, height)`. + """ + if self.mean is None: + return self._tlwh.copy() + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + @property + # @jit(nopython=True) + def tlbr(self): + """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_xyah(tlwh): + """Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = np.asarray(tlwh).copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + def to_xyah(self): + return self.tlwh_to_xyah(self.tlwh) + + @staticmethod + # @jit(nopython=True) + def tlbr_to_tlwh(tlbr): + ret = np.asarray(tlbr).copy() + ret[2:] -= ret[:2] + return ret + + @staticmethod + # @jit(nopython=True) + def tlwh_to_tlbr(tlwh): + ret = np.asarray(tlwh).copy() + ret[2:] += ret[:2] + return ret + + def __repr__(self): + return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) + + +class BYTETracker(object): + def __init__(self, args, frame_rate=30): + self.tracked_stracks = [] # type: list[STrack] + self.lost_stracks = [] # type: list[STrack] + self.removed_stracks = [] # type: list[STrack] + + self.frame_id = 0 + self.args = args + #self.det_thresh = args.track_thresh + self.det_thresh = args.track_thresh + 0.1 + self.buffer_size = int(frame_rate / 30.0 * args.track_buffer) + self.max_time_lost = self.buffer_size + self.kalman_filter = KalmanFilter() + + def update(self, output_results, img_info, img_size): + self.frame_id += 1 + activated_starcks = [] + refind_stracks = [] + lost_stracks = [] + removed_stracks = [] + + if output_results.shape[1] == 5: + scores = output_results[:, 4] + bboxes = output_results[:, :4] + else: + output_results = output_results.cpu().numpy() + scores = output_results[:, 4] * output_results[:, 5] + bboxes = output_results[:, :4] # x1y1x2y2 + img_h, img_w = img_info[0], img_info[1] + scale = min(img_size[0] / float(img_h), img_size[1] / float(img_w)) + bboxes /= scale + + remain_inds = scores > self.args.track_thresh + inds_low = scores > 0.1 + inds_high = scores < self.args.track_thresh + + inds_second = np.logical_and(inds_low, inds_high) + dets_second = bboxes[inds_second] + dets = bboxes[remain_inds] + scores_keep = scores[remain_inds] + scores_second = scores[inds_second] + + if len(dets) > 0: + '''Detections''' + detections = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for + (tlbr, s) in zip(dets, scores_keep)] + else: + detections = [] + + ''' Add newly detected tracklets to tracked_stracks''' + unconfirmed = [] + tracked_stracks = [] # type: list[STrack] + for track in self.tracked_stracks: + if not track.is_activated: + unconfirmed.append(track) + else: + tracked_stracks.append(track) + + ''' Step 2: First association, with high score detection boxes''' + strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) + # Predict the current location with KF + STrack.multi_predict(strack_pool) + dists = matching.iou_distance(strack_pool, detections) + if not self.args.mot20: + dists = matching.fuse_score(dists, detections) + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.args.match_thresh) + + for itracked, idet in matches: + track = strack_pool[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + track.update(detections[idet], self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + ''' Step 3: Second association, with low score detection boxes''' + # association the untrack to the low score detections + if len(dets_second) > 0: + '''Detections''' + detections_second = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for + (tlbr, s) in zip(dets_second, scores_second)] + else: + detections_second = [] + r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked] + dists = matching.iou_distance(r_tracked_stracks, detections_second) + matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5) + for itracked, idet in matches: + track = r_tracked_stracks[itracked] + det = detections_second[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + for it in u_track: + track = r_tracked_stracks[it] + if not track.state == TrackState.Lost: + track.mark_lost() + lost_stracks.append(track) + + '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' + detections = [detections[i] for i in u_detection] + dists = matching.iou_distance(unconfirmed, detections) + if not self.args.mot20: + dists = matching.fuse_score(dists, detections) + matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7) + for itracked, idet in matches: + unconfirmed[itracked].update(detections[idet], self.frame_id) + activated_starcks.append(unconfirmed[itracked]) + for it in u_unconfirmed: + track = unconfirmed[it] + track.mark_removed() + removed_stracks.append(track) + + """ Step 4: Init new stracks""" + for inew in u_detection: + track = detections[inew] + if track.score < self.det_thresh: + continue + track.activate(self.kalman_filter, self.frame_id) + activated_starcks.append(track) + """ Step 5: Update state""" + for track in self.lost_stracks: + if self.frame_id - track.end_frame > self.max_time_lost: + track.mark_removed() + removed_stracks.append(track) + + # print('Ramained match {} s'.format(t4-t3)) + + self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] + self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) + self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) + self.lost_stracks.extend(lost_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) + self.removed_stracks.extend(removed_stracks) + self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) + # get scores of lost tracks + output_stracks = [track for track in self.tracked_stracks if track.is_activated] + + return output_stracks + + +def joint_stracks(tlista, tlistb): + exists = {} + res = [] + for t in tlista: + exists[t.track_id] = 1 + res.append(t) + for t in tlistb: + tid = t.track_id + if not exists.get(tid, 0): + exists[tid] = 1 + res.append(t) + return res + + +def sub_stracks(tlista, tlistb): + stracks = {} + for t in tlista: + stracks[t.track_id] = t + for t in tlistb: + tid = t.track_id + if stracks.get(tid, 0): + del stracks[tid] + return list(stracks.values()) + + +def remove_duplicate_stracks(stracksa, stracksb): + pdist = matching.iou_distance(stracksa, stracksb) + pairs = np.where(pdist < 0.15) + dupa, dupb = list(), list() + for p, q in zip(*pairs): + timep = stracksa[p].frame_id - stracksa[p].start_frame + timeq = stracksb[q].frame_id - stracksb[q].start_frame + if timep > timeq: + dupb.append(q) + else: + dupa.append(p) + resa = [t for i, t in enumerate(stracksa) if not i in dupa] + resb = [t for i, t in enumerate(stracksb) if not i in dupb] + return resa, resb diff --git a/yolox/tracker/kalman_filter.py b/yolox/tracker/kalman_filter.py new file mode 100644 index 0000000000000000000000000000000000000000..deda8a26292b81bc6512a8f6145afabde6c16d7a --- /dev/null +++ b/yolox/tracker/kalman_filter.py @@ -0,0 +1,270 @@ +# vim: expandtab:ts=4:sw=4 +import numpy as np +import scipy.linalg + + +""" +Table for the 0.95 quantile of the chi-square distribution with N degrees of +freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv +function and used as Mahalanobis gating threshold. +""" +chi2inv95 = { + 1: 3.8415, + 2: 5.9915, + 3: 7.8147, + 4: 9.4877, + 5: 11.070, + 6: 12.592, + 7: 14.067, + 8: 15.507, + 9: 16.919} + + +class KalmanFilter(object): + """ + A simple Kalman filter for tracking bounding boxes in image space. + + The 8-dimensional state space + + x, y, a, h, vx, vy, va, vh + + contains the bounding box center position (x, y), aspect ratio a, height h, + and their respective velocities. + + Object motion follows a constant velocity model. The bounding box location + (x, y, a, h) is taken as direct observation of the state space (linear + observation model). + + """ + + def __init__(self): + ndim, dt = 4, 1. + + # Create Kalman filter model matrices. + self._motion_mat = np.eye(2 * ndim, 2 * ndim) + for i in range(ndim): + self._motion_mat[i, ndim + i] = dt + self._update_mat = np.eye(ndim, 2 * ndim) + + # Motion and observation uncertainty are chosen relative to the current + # state estimate. These weights control the amount of uncertainty in + # the model. This is a bit hacky. + self._std_weight_position = 1. / 20 + self._std_weight_velocity = 1. / 160 + + def initiate(self, measurement): + """Create track from unassociated measurement. + + Parameters + ---------- + measurement : ndarray + Bounding box coordinates (x, y, a, h) with center position (x, y), + aspect ratio a, and height h. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector (8 dimensional) and covariance matrix (8x8 + dimensional) of the new track. Unobserved velocities are initialized + to 0 mean. + + """ + mean_pos = measurement + mean_vel = np.zeros_like(mean_pos) + mean = np.r_[mean_pos, mean_vel] + + std = [ + 2 * self._std_weight_position * measurement[3], + 2 * self._std_weight_position * measurement[3], + 1e-2, + 2 * self._std_weight_position * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 10 * self._std_weight_velocity * measurement[3], + 1e-5, + 10 * self._std_weight_velocity * measurement[3]] + covariance = np.diag(np.square(std)) + return mean, covariance + + def predict(self, mean, covariance): + """Run Kalman filter prediction step. + + Parameters + ---------- + mean : ndarray + The 8 dimensional mean vector of the object state at the previous + time step. + covariance : ndarray + The 8x8 dimensional covariance matrix of the object state at the + previous time step. + + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + + """ + std_pos = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-2, + self._std_weight_position * mean[3]] + std_vel = [ + self._std_weight_velocity * mean[3], + self._std_weight_velocity * mean[3], + 1e-5, + self._std_weight_velocity * mean[3]] + motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) + + #mean = np.dot(self._motion_mat, mean) + mean = np.dot(mean, self._motion_mat.T) + covariance = np.linalg.multi_dot(( + self._motion_mat, covariance, self._motion_mat.T)) + motion_cov + + return mean, covariance + + def project(self, mean, covariance): + """Project state distribution to measurement space. + + Parameters + ---------- + mean : ndarray + The state's mean vector (8 dimensional array). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + + Returns + ------- + (ndarray, ndarray) + Returns the projected mean and covariance matrix of the given state + estimate. + + """ + std = [ + self._std_weight_position * mean[3], + self._std_weight_position * mean[3], + 1e-1, + self._std_weight_position * mean[3]] + innovation_cov = np.diag(np.square(std)) + + mean = np.dot(self._update_mat, mean) + covariance = np.linalg.multi_dot(( + self._update_mat, covariance, self._update_mat.T)) + return mean, covariance + innovation_cov + + def multi_predict(self, mean, covariance): + """Run Kalman filter prediction step (Vectorized version). + Parameters + ---------- + mean : ndarray + The Nx8 dimensional mean matrix of the object states at the previous + time step. + covariance : ndarray + The Nx8x8 dimensional covariance matrics of the object states at the + previous time step. + Returns + ------- + (ndarray, ndarray) + Returns the mean vector and covariance matrix of the predicted + state. Unobserved velocities are initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[:, 3], + self._std_weight_position * mean[:, 3], + 1e-2 * np.ones_like(mean[:, 3]), + self._std_weight_position * mean[:, 3]] + std_vel = [ + self._std_weight_velocity * mean[:, 3], + self._std_weight_velocity * mean[:, 3], + 1e-5 * np.ones_like(mean[:, 3]), + self._std_weight_velocity * mean[:, 3]] + sqr = np.square(np.r_[std_pos, std_vel]).T + + motion_cov = [] + for i in range(len(mean)): + motion_cov.append(np.diag(sqr[i])) + motion_cov = np.asarray(motion_cov) + + mean = np.dot(mean, self._motion_mat.T) + left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2)) + covariance = np.dot(left, self._motion_mat.T) + motion_cov + + return mean, covariance + + def update(self, mean, covariance, measurement): + """Run Kalman filter correction step. + + Parameters + ---------- + mean : ndarray + The predicted state's mean vector (8 dimensional). + covariance : ndarray + The state's covariance matrix (8x8 dimensional). + measurement : ndarray + The 4 dimensional measurement vector (x, y, a, h), where (x, y) + is the center position, a the aspect ratio, and h the height of the + bounding box. + + Returns + ------- + (ndarray, ndarray) + Returns the measurement-corrected state distribution. + + """ + projected_mean, projected_cov = self.project(mean, covariance) + + chol_factor, lower = scipy.linalg.cho_factor( + projected_cov, lower=True, check_finite=False) + kalman_gain = scipy.linalg.cho_solve( + (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, + check_finite=False).T + innovation = measurement - projected_mean + + new_mean = mean + np.dot(innovation, kalman_gain.T) + new_covariance = covariance - np.linalg.multi_dot(( + kalman_gain, projected_cov, kalman_gain.T)) + return new_mean, new_covariance + + def gating_distance(self, mean, covariance, measurements, + only_position=False, metric='maha'): + """Compute gating distance between state distribution and measurements. + A suitable distance threshold can be obtained from `chi2inv95`. If + `only_position` is False, the chi-square distribution has 4 degrees of + freedom, otherwise 2. + Parameters + ---------- + mean : ndarray + Mean vector over the state distribution (8 dimensional). + covariance : ndarray + Covariance of the state distribution (8x8 dimensional). + measurements : ndarray + An Nx4 dimensional matrix of N measurements, each in + format (x, y, a, h) where (x, y) is the bounding box center + position, a the aspect ratio, and h the height. + only_position : Optional[bool] + If True, distance computation is done with respect to the bounding + box center position only. + Returns + ------- + ndarray + Returns an array of length N, where the i-th element contains the + squared Mahalanobis distance between (mean, covariance) and + `measurements[i]`. + """ + mean, covariance = self.project(mean, covariance) + if only_position: + mean, covariance = mean[:2], covariance[:2, :2] + measurements = measurements[:, :2] + + d = measurements - mean + if metric == 'gaussian': + return np.sum(d * d, axis=1) + elif metric == 'maha': + cholesky_factor = np.linalg.cholesky(covariance) + z = scipy.linalg.solve_triangular( + cholesky_factor, d.T, lower=True, check_finite=False, + overwrite_b=True) + squared_maha = np.sum(z * z, axis=0) + return squared_maha + else: + raise ValueError('invalid distance metric') \ No newline at end of file diff --git a/yolox/tracker/matching.py b/yolox/tracker/matching.py new file mode 100644 index 0000000000000000000000000000000000000000..d36a6cf5bf758a49bd414f63f402fef3fdd2e18c --- /dev/null +++ b/yolox/tracker/matching.py @@ -0,0 +1,181 @@ +import cv2 +import numpy as np +import scipy +import lap +from scipy.spatial.distance import cdist + +from cython_bbox import bbox_overlaps as bbox_ious +from yolox.tracker import kalman_filter +import time + +def merge_matches(m1, m2, shape): + O,P,Q = shape + m1 = np.asarray(m1) + m2 = np.asarray(m2) + + M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P)) + M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q)) + + mask = M1*M2 + match = mask.nonzero() + match = list(zip(match[0], match[1])) + unmatched_O = tuple(set(range(O)) - set([i for i, j in match])) + unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match])) + + return match, unmatched_O, unmatched_Q + + +def _indices_to_matches(cost_matrix, indices, thresh): + matched_cost = cost_matrix[tuple(zip(*indices))] + matched_mask = (matched_cost <= thresh) + + matches = indices[matched_mask] + unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0])) + unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1])) + + return matches, unmatched_a, unmatched_b + + +def linear_assignment(cost_matrix, thresh): + if cost_matrix.size == 0: + return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) + matches, unmatched_a, unmatched_b = [], [], [] + cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) + for ix, mx in enumerate(x): + if mx >= 0: + matches.append([ix, mx]) + unmatched_a = np.where(x < 0)[0] + unmatched_b = np.where(y < 0)[0] + matches = np.asarray(matches) + return matches, unmatched_a, unmatched_b + + +def ious(atlbrs, btlbrs): + """ + Compute cost based on IoU + :type atlbrs: list[tlbr] | np.ndarray + :type atlbrs: list[tlbr] | np.ndarray + + :rtype ious np.ndarray + """ + ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) + if ious.size == 0: + return ious + + ious = bbox_ious( + np.ascontiguousarray(atlbrs, dtype=np.float), + np.ascontiguousarray(btlbrs, dtype=np.float) + ) + + return ious + + +def iou_distance(atracks, btracks): + """ + Compute cost based on IoU + :type atracks: list[STrack] + :type btracks: list[STrack] + + :rtype cost_matrix np.ndarray + """ + + if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): + atlbrs = atracks + btlbrs = btracks + else: + atlbrs = [track.tlbr for track in atracks] + btlbrs = [track.tlbr for track in btracks] + _ious = ious(atlbrs, btlbrs) + cost_matrix = 1 - _ious + + return cost_matrix + +def v_iou_distance(atracks, btracks): + """ + Compute cost based on IoU + :type atracks: list[STrack] + :type btracks: list[STrack] + + :rtype cost_matrix np.ndarray + """ + + if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): + atlbrs = atracks + btlbrs = btracks + else: + atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks] + btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks] + _ious = ious(atlbrs, btlbrs) + cost_matrix = 1 - _ious + + return cost_matrix + +def embedding_distance(tracks, detections, metric='cosine'): + """ + :param tracks: list[STrack] + :param detections: list[BaseTrack] + :param metric: + :return: cost_matrix np.ndarray + """ + + cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) + if cost_matrix.size == 0: + return cost_matrix + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + #for i, track in enumerate(tracks): + #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + return cost_matrix + + +def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False): + if cost_matrix.size == 0: + return cost_matrix + gating_dim = 2 if only_position else 4 + gating_threshold = kalman_filter.chi2inv95[gating_dim] + measurements = np.asarray([det.to_xyah() for det in detections]) + for row, track in enumerate(tracks): + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position) + cost_matrix[row, gating_distance > gating_threshold] = np.inf + return cost_matrix + + +def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98): + if cost_matrix.size == 0: + return cost_matrix + gating_dim = 2 if only_position else 4 + gating_threshold = kalman_filter.chi2inv95[gating_dim] + measurements = np.asarray([det.to_xyah() for det in detections]) + for row, track in enumerate(tracks): + gating_distance = kf.gating_distance( + track.mean, track.covariance, measurements, only_position, metric='maha') + cost_matrix[row, gating_distance > gating_threshold] = np.inf + cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance + return cost_matrix + + +def fuse_iou(cost_matrix, tracks, detections): + if cost_matrix.size == 0: + return cost_matrix + reid_sim = 1 - cost_matrix + iou_dist = iou_distance(tracks, detections) + iou_sim = 1 - iou_dist + fuse_sim = reid_sim * (1 + iou_sim) / 2 + det_scores = np.array([det.score for det in detections]) + det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0) + #fuse_sim = fuse_sim * (1 + det_scores) / 2 + fuse_cost = 1 - fuse_sim + return fuse_cost + + +def fuse_score(cost_matrix, detections): + if cost_matrix.size == 0: + return cost_matrix + iou_sim = 1 - cost_matrix + det_scores = np.array([det.score for det in detections]) + det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0) + fuse_sim = iou_sim * det_scores + fuse_cost = 1 - fuse_sim + return fuse_cost \ No newline at end of file diff --git a/yolox/tracking_utils/evaluation.py b/yolox/tracking_utils/evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..6e2f063e43a529da934e897390bfa0daf8c19610 --- /dev/null +++ b/yolox/tracking_utils/evaluation.py @@ -0,0 +1,113 @@ +import os +import numpy as np +import copy +import motmetrics as mm +mm.lap.default_solver = 'lap' + +from yolox.tracking_utils.io import read_results, unzip_objs + + +class Evaluator(object): + + def __init__(self, data_root, seq_name, data_type): + self.data_root = data_root + self.seq_name = seq_name + self.data_type = data_type + + self.load_annotations() + self.reset_accumulator() + + def load_annotations(self): + assert self.data_type == 'mot' + + gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt') + self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True) + self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True) + + def reset_accumulator(self): + self.acc = mm.MOTAccumulator(auto_id=True) + + def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False): + # results + trk_tlwhs = np.copy(trk_tlwhs) + trk_ids = np.copy(trk_ids) + + # gts + gt_objs = self.gt_frame_dict.get(frame_id, []) + gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2] + + # ignore boxes + ignore_objs = self.gt_ignore_frame_dict.get(frame_id, []) + ignore_tlwhs = unzip_objs(ignore_objs)[0] + + # remove ignored results + keep = np.ones(len(trk_tlwhs), dtype=bool) + iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5) + if len(iou_distance) > 0: + match_is, match_js = mm.lap.linear_sum_assignment(iou_distance) + match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js]) + match_ious = iou_distance[match_is, match_js] + + match_js = np.asarray(match_js, dtype=int) + match_js = match_js[np.logical_not(np.isnan(match_ious))] + keep[match_js] = False + trk_tlwhs = trk_tlwhs[keep] + trk_ids = trk_ids[keep] + #match_is, match_js = mm.lap.linear_sum_assignment(iou_distance) + #match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js]) + #match_ious = iou_distance[match_is, match_js] + + #match_js = np.asarray(match_js, dtype=int) + #match_js = match_js[np.logical_not(np.isnan(match_ious))] + #keep[match_js] = False + #trk_tlwhs = trk_tlwhs[keep] + #trk_ids = trk_ids[keep] + + # get distance matrix + iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5) + + # acc + self.acc.update(gt_ids, trk_ids, iou_distance) + + if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'): + events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics + else: + events = None + return events + + def eval_file(self, filename): + self.reset_accumulator() + + result_frame_dict = read_results(filename, self.data_type, is_gt=False) + #frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys()))) + frames = sorted(list(set(result_frame_dict.keys()))) + for frame_id in frames: + trk_objs = result_frame_dict.get(frame_id, []) + trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2] + self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False) + + return self.acc + + @staticmethod + def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')): + names = copy.deepcopy(names) + if metrics is None: + metrics = mm.metrics.motchallenge_metrics + metrics = copy.deepcopy(metrics) + + mh = mm.metrics.create() + summary = mh.compute_many( + accs, + metrics=metrics, + names=names, + generate_overall=True + ) + + return summary + + @staticmethod + def save_summary(summary, filename): + import pandas as pd + writer = pd.ExcelWriter(filename) + summary.to_excel(writer) + writer.save() \ No newline at end of file diff --git a/yolox/tracking_utils/io.py b/yolox/tracking_utils/io.py new file mode 100644 index 0000000000000000000000000000000000000000..5c8b053c9bc868d645b7ce0bec057879ba51f2d8 --- /dev/null +++ b/yolox/tracking_utils/io.py @@ -0,0 +1,116 @@ +import os +from typing import Dict +import numpy as np + + +def write_results(filename, results_dict: Dict, data_type: str): + if not filename: + return + path = os.path.dirname(filename) + if not os.path.exists(path): + os.makedirs(path) + + if data_type in ('mot', 'mcmot', 'lab'): + save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n' + elif data_type == 'kitti': + save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n' + else: + raise ValueError(data_type) + + with open(filename, 'w') as f: + for frame_id, frame_data in results_dict.items(): + if data_type == 'kitti': + frame_id -= 1 + for tlwh, track_id in frame_data: + if track_id < 0: + continue + x1, y1, w, h = tlwh + x2, y2 = x1 + w, y1 + h + line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0) + f.write(line) + + +def read_results(filename, data_type: str, is_gt=False, is_ignore=False): + if data_type in ('mot', 'lab'): + read_fun = read_mot_results + else: + raise ValueError('Unknown data type: {}'.format(data_type)) + + return read_fun(filename, is_gt, is_ignore) + + +""" +labels={'ped', ... % 1 +'person_on_vhcl', ... % 2 +'car', ... % 3 +'bicycle', ... % 4 +'mbike', ... % 5 +'non_mot_vhcl', ... % 6 +'static_person', ... % 7 +'distractor', ... % 8 +'occluder', ... % 9 +'occluder_on_grnd', ... %10 +'occluder_full', ... % 11 +'reflection', ... % 12 +'crowd' ... % 13 +}; +""" + + +def read_mot_results(filename, is_gt, is_ignore): + valid_labels = {1} + ignore_labels = {2, 7, 8, 12} + results_dict = dict() + if os.path.isfile(filename): + with open(filename, 'r') as f: + for line in f.readlines(): + linelist = line.split(',') + if len(linelist) < 7: + continue + fid = int(linelist[0]) + if fid < 1: + continue + results_dict.setdefault(fid, list()) + + box_size = float(linelist[4]) * float(linelist[5]) + + if is_gt: + if 'MOT16-' in filename or 'MOT17-' in filename: + label = int(float(linelist[7])) + mark = int(float(linelist[6])) + if mark == 0 or label not in valid_labels: + continue + score = 1 + elif is_ignore: + if 'MOT16-' in filename or 'MOT17-' in filename: + label = int(float(linelist[7])) + vis_ratio = float(linelist[8]) + if label not in ignore_labels and vis_ratio >= 0: + continue + else: + continue + score = 1 + else: + score = float(linelist[6]) + + #if box_size > 7000: + #if box_size <= 7000 or box_size >= 15000: + #if box_size < 15000: + #continue + + tlwh = tuple(map(float, linelist[2:6])) + target_id = int(linelist[1]) + + results_dict[fid].append((tlwh, target_id, score)) + + return results_dict + + +def unzip_objs(objs): + if len(objs) > 0: + tlwhs, ids, scores = zip(*objs) + else: + tlwhs, ids, scores = [], [], [] + tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4) + + return tlwhs, ids, scores \ No newline at end of file diff --git a/yolox/tracking_utils/timer.py b/yolox/tracking_utils/timer.py new file mode 100644 index 0000000000000000000000000000000000000000..c9b15fb969bce7b31a1613a6401141dcc9cf180a --- /dev/null +++ b/yolox/tracking_utils/timer.py @@ -0,0 +1,37 @@ +import time + + +class Timer(object): + """A simple timer.""" + def __init__(self): + self.total_time = 0. + self.calls = 0 + self.start_time = 0. + self.diff = 0. + self.average_time = 0. + + self.duration = 0. + + def tic(self): + # using time.time instead of time.clock because time time.clock + # does not normalize for multithreading + self.start_time = time.time() + + def toc(self, average=True): + self.diff = time.time() - self.start_time + self.total_time += self.diff + self.calls += 1 + self.average_time = self.total_time / self.calls + if average: + self.duration = self.average_time + else: + self.duration = self.diff + return self.duration + + def clear(self): + self.total_time = 0. + self.calls = 0 + self.start_time = 0. + self.diff = 0. + self.average_time = 0. + self.duration = 0. \ No newline at end of file diff --git a/yolox/utils/__init__.py b/yolox/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a268c1a4538ce568c8f9ef1c0d10511fdac34be1 --- /dev/null +++ b/yolox/utils/__init__.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +from .allreduce_norm import * +from .boxes import * +from .checkpoint import load_ckpt, save_checkpoint +from .demo_utils import * +from .dist import * +from .ema import ModelEMA +from .logger import setup_logger +from .lr_scheduler import LRScheduler +from .metric import * +from .model_utils import * +from .setup_env import * +from .visualize import * diff --git a/yolox/utils/allreduce_norm.py b/yolox/utils/allreduce_norm.py new file mode 100644 index 0000000000000000000000000000000000000000..d9b51e2608e3ae3b82355cfc4593edb96213b520 --- /dev/null +++ b/yolox/utils/allreduce_norm.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +import torch +from torch import distributed as dist +from torch import nn + +import pickle +from collections import OrderedDict + +from .dist import _get_global_gloo_group, get_world_size + +ASYNC_NORM = ( + nn.BatchNorm1d, + nn.BatchNorm2d, + nn.BatchNorm3d, + nn.InstanceNorm1d, + nn.InstanceNorm2d, + nn.InstanceNorm3d, +) + +__all__ = [ + "get_async_norm_states", + "pyobj2tensor", + "tensor2pyobj", + "all_reduce", + "all_reduce_norm", +] + + +def get_async_norm_states(module): + async_norm_states = OrderedDict() + for name, child in module.named_modules(): + if isinstance(child, ASYNC_NORM): + for k, v in child.state_dict().items(): + async_norm_states[".".join([name, k])] = v + return async_norm_states + + +def pyobj2tensor(pyobj, device="cuda"): + """serialize picklable python object to tensor""" + storage = torch.ByteStorage.from_buffer(pickle.dumps(pyobj)) + return torch.ByteTensor(storage).to(device=device) + + +def tensor2pyobj(tensor): + """deserialize tensor to picklable python object""" + return pickle.loads(tensor.cpu().numpy().tobytes()) + + +def _get_reduce_op(op_name): + return { + "sum": dist.ReduceOp.SUM, + "mean": dist.ReduceOp.SUM, + }[op_name.lower()] + + +def all_reduce(py_dict, op="sum", group=None): + """ + Apply all reduce function for python dict object. + NOTE: make sure that every py_dict has the same keys and values are in the same shape. + + Args: + py_dict (dict): dict to apply all reduce op. + op (str): operator, could be "sum" or "mean". + """ + world_size = get_world_size() + if world_size == 1: + return py_dict + if group is None: + group = _get_global_gloo_group() + if dist.get_world_size(group) == 1: + return py_dict + + # all reduce logic across different devices. + py_key = list(py_dict.keys()) + py_key_tensor = pyobj2tensor(py_key) + dist.broadcast(py_key_tensor, src=0) + py_key = tensor2pyobj(py_key_tensor) + + tensor_shapes = [py_dict[k].shape for k in py_key] + tensor_numels = [py_dict[k].numel() for k in py_key] + + flatten_tensor = torch.cat([py_dict[k].flatten() for k in py_key]) + dist.all_reduce(flatten_tensor, op=_get_reduce_op(op)) + if op == "mean": + flatten_tensor /= world_size + + split_tensors = [ + x.reshape(shape) + for x, shape in zip(torch.split(flatten_tensor, tensor_numels), tensor_shapes) + ] + return OrderedDict({k: v for k, v in zip(py_key, split_tensors)}) + + +def all_reduce_norm(module): + """ + All reduce norm statistics in different devices. + """ + states = get_async_norm_states(module) + states = all_reduce(states, op="mean") + module.load_state_dict(states, strict=False) diff --git a/yolox/utils/boxes.py b/yolox/utils/boxes.py new file mode 100644 index 0000000000000000000000000000000000000000..ac262b9608f85151e4bbeac3c7b02779dc63de75 --- /dev/null +++ b/yolox/utils/boxes.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +import numpy as np + +import torch +import torchvision +import torch.nn.functional as F + +__all__ = [ + "filter_box", + "postprocess", + "bboxes_iou", + "matrix_iou", + "adjust_box_anns", + "xyxy2xywh", + "xyxy2cxcywh", +] + + +def filter_box(output, scale_range): + """ + output: (N, 5+class) shape + """ + min_scale, max_scale = scale_range + w = output[:, 2] - output[:, 0] + h = output[:, 3] - output[:, 1] + keep = (w * h > min_scale * min_scale) & (w * h < max_scale * max_scale) + return output[keep] + + +def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45): + box_corner = prediction.new(prediction.shape) + box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 + box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 + box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 + box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 + prediction[:, :, :4] = box_corner[:, :, :4] + + output = [None for _ in range(len(prediction))] + for i, image_pred in enumerate(prediction): + + # If none are remaining => process next image + if not image_pred.size(0): + continue + # Get score and class with highest confidence + class_conf, class_pred = torch.max( + image_pred[:, 5 : 5 + num_classes], 1, keepdim=True + ) + + conf_mask = (image_pred[:, 4] * class_conf.squeeze() >= conf_thre).squeeze() + # _, conf_mask = torch.topk((image_pred[:, 4] * class_conf.squeeze()), 1000) + # Detections ordered as (x1, y1, x2, y2, obj_conf, class_conf, class_pred) + detections = torch.cat((image_pred[:, :5], class_conf, class_pred.float()), 1) + detections = detections[conf_mask] + if not detections.size(0): + continue + + nms_out_index = torchvision.ops.batched_nms( + detections[:, :4], + detections[:, 4] * detections[:, 5], + detections[:, 6], + nms_thre, + ) + detections = detections[nms_out_index] + if output[i] is None: + output[i] = detections + else: + output[i] = torch.cat((output[i], detections)) + + return output + + +def bboxes_iou(bboxes_a, bboxes_b, xyxy=True): + if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4: + raise IndexError + + if xyxy: + tl = torch.max(bboxes_a[:, None, :2], bboxes_b[:, :2]) + br = torch.min(bboxes_a[:, None, 2:], bboxes_b[:, 2:]) + area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1) + area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1) + else: + tl = torch.max( + (bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2), + (bboxes_b[:, :2] - bboxes_b[:, 2:] / 2), + ) + br = torch.min( + (bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2), + (bboxes_b[:, :2] + bboxes_b[:, 2:] / 2), + ) + + area_a = torch.prod(bboxes_a[:, 2:], 1) + area_b = torch.prod(bboxes_b[:, 2:], 1) + en = (tl < br).type(tl.type()).prod(dim=2) + area_i = torch.prod(br - tl, 2) * en # * ((tl < br).all()) + return area_i / (area_a[:, None] + area_b - area_i) + + +def matrix_iou(a, b): + """ + return iou of a and b, numpy version for data augenmentation + """ + lt = np.maximum(a[:, np.newaxis, :2], b[:, :2]) + rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) + + area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2) + area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) + area_b = np.prod(b[:, 2:] - b[:, :2], axis=1) + return area_i / (area_a[:, np.newaxis] + area_b - area_i + 1e-12) + + +def adjust_box_anns(bbox, scale_ratio, padw, padh, w_max, h_max): + #bbox[:, 0::2] = np.clip(bbox[:, 0::2] * scale_ratio + padw, 0, w_max) + #bbox[:, 1::2] = np.clip(bbox[:, 1::2] * scale_ratio + padh, 0, h_max) + bbox[:, 0::2] = bbox[:, 0::2] * scale_ratio + padw + bbox[:, 1::2] = bbox[:, 1::2] * scale_ratio + padh + return bbox + + +def xyxy2xywh(bboxes): + bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0] + bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1] + return bboxes + + +def xyxy2cxcywh(bboxes): + bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0] + bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1] + bboxes[:, 0] = bboxes[:, 0] + bboxes[:, 2] * 0.5 + bboxes[:, 1] = bboxes[:, 1] + bboxes[:, 3] * 0.5 + return bboxes diff --git a/yolox/utils/checkpoint.py b/yolox/utils/checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..55903b4695b1926f76ced732797893702cf6387c --- /dev/null +++ b/yolox/utils/checkpoint.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. +from loguru import logger + +import torch + +import os +import shutil + + +def load_ckpt(model, ckpt): + model_state_dict = model.state_dict() + load_dict = {} + for key_model, v in model_state_dict.items(): + if key_model not in ckpt: + logger.warning( + "{} is not in the ckpt. Please double check and see if this is desired.".format( + key_model + ) + ) + continue + v_ckpt = ckpt[key_model] + if v.shape != v_ckpt.shape: + logger.warning( + "Shape of {} in checkpoint is {}, while shape of {} in model is {}.".format( + key_model, v_ckpt.shape, key_model, v.shape + ) + ) + continue + load_dict[key_model] = v_ckpt + + model.load_state_dict(load_dict, strict=False) + return model + + +def save_checkpoint(state, is_best, save_dir, model_name=""): + if not os.path.exists(save_dir): + os.makedirs(save_dir) + filename = os.path.join(save_dir, model_name + "_ckpt.pth.tar") + torch.save(state, filename) + if is_best: + best_filename = os.path.join(save_dir, "best_ckpt.pth.tar") + shutil.copyfile(filename, best_filename) diff --git a/yolox/utils/demo_utils.py b/yolox/utils/demo_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..093443cd568a2b0421fa707eb8fda97ec154b142 --- /dev/null +++ b/yolox/utils/demo_utils.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +import numpy as np + +import os + +__all__ = ["mkdir", "nms", "multiclass_nms", "demo_postprocess"] + + +def mkdir(path): + if not os.path.exists(path): + os.makedirs(path) + + +def nms(boxes, scores, nms_thr): + """Single class NMS implemented in Numpy.""" + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + inds = np.where(ovr <= nms_thr)[0] + order = order[inds + 1] + + return keep + + +def multiclass_nms(boxes, scores, nms_thr, score_thr): + """Multiclass NMS implemented in Numpy""" + final_dets = [] + num_classes = scores.shape[1] + for cls_ind in range(num_classes): + cls_scores = scores[:, cls_ind] + valid_score_mask = cls_scores > score_thr + if valid_score_mask.sum() == 0: + continue + else: + valid_scores = cls_scores[valid_score_mask] + valid_boxes = boxes[valid_score_mask] + keep = nms(valid_boxes, valid_scores, nms_thr) + if len(keep) > 0: + cls_inds = np.ones((len(keep), 1)) * cls_ind + dets = np.concatenate( + [valid_boxes[keep], valid_scores[keep, None], cls_inds], 1 + ) + final_dets.append(dets) + if len(final_dets) == 0: + return None + return np.concatenate(final_dets, 0) + + +def demo_postprocess(outputs, img_size, p6=False): + + grids = [] + expanded_strides = [] + + if not p6: + strides = [8, 16, 32] + else: + strides = [8, 16, 32, 64] + + hsizes = [img_size[0] // stride for stride in strides] + wsizes = [img_size[1] // stride for stride in strides] + + for hsize, wsize, stride in zip(hsizes, wsizes, strides): + xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize)) + grid = np.stack((xv, yv), 2).reshape(1, -1, 2) + grids.append(grid) + shape = grid.shape[:2] + expanded_strides.append(np.full((*shape, 1), stride)) + + grids = np.concatenate(grids, 1) + expanded_strides = np.concatenate(expanded_strides, 1) + outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides + outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides + + return outputs diff --git a/yolox/utils/dist.py b/yolox/utils/dist.py new file mode 100644 index 0000000000000000000000000000000000000000..691c30690a5b4237cab23b9547cb106a1bd31dd7 --- /dev/null +++ b/yolox/utils/dist.py @@ -0,0 +1,255 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# This file mainly comes from +# https://github.com/facebookresearch/detectron2/blob/master/detectron2/utils/comm.py +# Copyright (c) Facebook, Inc. and its affiliates. +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. +""" +This file contains primitives for multi-gpu communication. +This is useful when doing distributed training. +""" + +import numpy as np + +import torch +from torch import distributed as dist + +import functools +import logging +import pickle +import time + +__all__ = [ + "is_main_process", + "synchronize", + "get_world_size", + "get_rank", + "get_local_rank", + "get_local_size", + "time_synchronized", + "gather", + "all_gather", +] + +_LOCAL_PROCESS_GROUP = None + + +def synchronize(): + """ + Helper function to synchronize (barrier) among all processes when using distributed training + """ + if not dist.is_available(): + return + if not dist.is_initialized(): + return + world_size = dist.get_world_size() + if world_size == 1: + return + dist.barrier() + + +def get_world_size() -> int: + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size() + + +def get_rank() -> int: + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + return dist.get_rank() + + +def get_local_rank() -> int: + """ + Returns: + The rank of the current process within the local (per-machine) process group. + """ + if not dist.is_available(): + return 0 + if not dist.is_initialized(): + return 0 + assert _LOCAL_PROCESS_GROUP is not None + return dist.get_rank(group=_LOCAL_PROCESS_GROUP) + + +def get_local_size() -> int: + """ + Returns: + The size of the per-machine process group, i.e. the number of processes per machine. + """ + if not dist.is_available(): + return 1 + if not dist.is_initialized(): + return 1 + return dist.get_world_size(group=_LOCAL_PROCESS_GROUP) + + +def is_main_process() -> bool: + return get_rank() == 0 + + +@functools.lru_cache() +def _get_global_gloo_group(): + """ + Return a process group based on gloo backend, containing all the ranks + The result is cached. + """ + if dist.get_backend() == "nccl": + return dist.new_group(backend="gloo") + else: + return dist.group.WORLD + + +def _serialize_to_tensor(data, group): + backend = dist.get_backend(group) + assert backend in ["gloo", "nccl"] + device = torch.device("cpu" if backend == "gloo" else "cuda") + + buffer = pickle.dumps(data) + if len(buffer) > 1024 ** 3: + logger = logging.getLogger(__name__) + logger.warning( + "Rank {} trying to all-gather {:.2f} GB of data on device {}".format( + get_rank(), len(buffer) / (1024 ** 3), device + ) + ) + storage = torch.ByteStorage.from_buffer(buffer) + tensor = torch.ByteTensor(storage).to(device=device) + return tensor + + +def _pad_to_largest_tensor(tensor, group): + """ + Returns: + list[int]: size of the tensor, on each rank + Tensor: padded tensor that has the max size + """ + world_size = dist.get_world_size(group=group) + assert ( + world_size >= 1 + ), "comm.gather/all_gather must be called from ranks within the given group!" + local_size = torch.tensor([tensor.numel()], dtype=torch.int64, device=tensor.device) + size_list = [ + torch.zeros([1], dtype=torch.int64, device=tensor.device) + for _ in range(world_size) + ] + dist.all_gather(size_list, local_size, group=group) + size_list = [int(size.item()) for size in size_list] + + max_size = max(size_list) + + # we pad the tensor because torch all_gather does not support + # gathering tensors of different shapes + if local_size != max_size: + padding = torch.zeros( + (max_size - local_size,), dtype=torch.uint8, device=tensor.device + ) + tensor = torch.cat((tensor, padding), dim=0) + return size_list, tensor + + +def all_gather(data, group=None): + """ + Run all_gather on arbitrary picklable data (not necessarily tensors). + + Args: + data: any picklable object + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + Returns: + list[data]: list of data gathered from each rank + """ + if get_world_size() == 1: + return [data] + if group is None: + group = _get_global_gloo_group() + if dist.get_world_size(group) == 1: + return [data] + + tensor = _serialize_to_tensor(data, group) + + size_list, tensor = _pad_to_largest_tensor(tensor, group) + max_size = max(size_list) + + # receiving Tensor from all ranks + tensor_list = [ + torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) + for _ in size_list + ] + dist.all_gather(tensor_list, tensor, group=group) + + data_list = [] + for size, tensor in zip(size_list, tensor_list): + buffer = tensor.cpu().numpy().tobytes()[:size] + data_list.append(pickle.loads(buffer)) + + return data_list + + +def gather(data, dst=0, group=None): + """ + Run gather on arbitrary picklable data (not necessarily tensors). + + Args: + data: any picklable object + dst (int): destination rank + group: a torch process group. By default, will use a group which + contains all ranks on gloo backend. + + Returns: + list[data]: on dst, a list of data gathered from each rank. Otherwise, + an empty list. + """ + if get_world_size() == 1: + return [data] + if group is None: + group = _get_global_gloo_group() + if dist.get_world_size(group=group) == 1: + return [data] + rank = dist.get_rank(group=group) + + tensor = _serialize_to_tensor(data, group) + size_list, tensor = _pad_to_largest_tensor(tensor, group) + + # receiving Tensor from all ranks + if rank == dst: + max_size = max(size_list) + tensor_list = [ + torch.empty((max_size,), dtype=torch.uint8, device=tensor.device) + for _ in size_list + ] + dist.gather(tensor, tensor_list, dst=dst, group=group) + + data_list = [] + for size, tensor in zip(size_list, tensor_list): + buffer = tensor.cpu().numpy().tobytes()[:size] + data_list.append(pickle.loads(buffer)) + return data_list + else: + dist.gather(tensor, [], dst=dst, group=group) + return [] + + +def shared_random_seed(): + """ + Returns: + int: a random number that is the same across all workers. + If workers need a shared RNG, they can use this shared seed to + create one. + All workers must call this function, otherwise it will deadlock. + """ + ints = np.random.randint(2 ** 31) + all_ints = all_gather(ints) + return all_ints[0] + + +def time_synchronized(): + """pytorch-accurate time""" + if torch.cuda.is_available(): + torch.cuda.synchronize() + return time.time() diff --git a/yolox/utils/ema.py b/yolox/utils/ema.py new file mode 100644 index 0000000000000000000000000000000000000000..e0d09baf32e590aba97fc2b5aabf41a40549e55d --- /dev/null +++ b/yolox/utils/ema.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. +import torch +import torch.nn as nn + +import math +from copy import deepcopy + + +def is_parallel(model): + """check if model is in parallel mode.""" + + parallel_type = ( + nn.parallel.DataParallel, + nn.parallel.DistributedDataParallel, + ) + return isinstance(model, parallel_type) + + +def copy_attr(a, b, include=(), exclude=()): + # Copy attributes from b to a, options to only include [...] and to exclude [...] + for k, v in b.__dict__.items(): + if (len(include) and k not in include) or k.startswith("_") or k in exclude: + continue + else: + setattr(a, k, v) + + +class ModelEMA: + """ + Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models + Keep a moving average of everything in the model state_dict (parameters and buffers). + This is intended to allow functionality like + https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage + A smoothed version of the weights is necessary for some training schemes to perform well. + This class is sensitive where it is initialized in the sequence of model init, + GPU assignment and distributed training wrappers. + """ + + def __init__(self, model, decay=0.9999, updates=0): + """ + Args: + model (nn.Module): model to apply EMA. + decay (float): ema decay reate. + updates (int): counter of EMA updates. + """ + # Create EMA(FP32) + self.ema = deepcopy(model.module if is_parallel(model) else model).eval() + self.updates = updates + # decay exponential ramp (to help early epochs) + self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) + for p in self.ema.parameters(): + p.requires_grad_(False) + + def update(self, model): + # Update EMA parameters + with torch.no_grad(): + self.updates += 1 + d = self.decay(self.updates) + + msd = ( + model.module.state_dict() if is_parallel(model) else model.state_dict() + ) # model state_dict + for k, v in self.ema.state_dict().items(): + if v.dtype.is_floating_point: + v *= d + v += (1.0 - d) * msd[k].detach() + + def update_attr(self, model, include=(), exclude=("process_group", "reducer")): + # Update EMA attributes + copy_attr(self.ema, model, include, exclude) diff --git a/yolox/utils/logger.py b/yolox/utils/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..4bd51d9ec6569c452b34c1cf60ff03044842c2ee --- /dev/null +++ b/yolox/utils/logger.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +from loguru import logger + +import inspect +import os +import sys + + +def get_caller_name(depth=0): + """ + Args: + depth (int): Depth of caller conext, use 0 for caller depth. Default value: 0. + + Returns: + str: module name of the caller + """ + # the following logic is a little bit faster than inspect.stack() logic + frame = inspect.currentframe().f_back + for _ in range(depth): + frame = frame.f_back + + return frame.f_globals["__name__"] + + +class StreamToLoguru: + """ + stream object that redirects writes to a logger instance. + """ + + def __init__(self, level="INFO", caller_names=("apex", "pycocotools")): + """ + Args: + level(str): log level string of loguru. Default value: "INFO". + caller_names(tuple): caller names of redirected module. + Default value: (apex, pycocotools). + """ + self.level = level + self.linebuf = "" + self.caller_names = caller_names + + def write(self, buf): + full_name = get_caller_name(depth=1) + module_name = full_name.rsplit(".", maxsplit=-1)[0] + if module_name in self.caller_names: + for line in buf.rstrip().splitlines(): + # use caller level log + logger.opt(depth=2).log(self.level, line.rstrip()) + else: + sys.__stdout__.write(buf) + + def flush(self): + pass + + +def redirect_sys_output(log_level="INFO"): + redirect_logger = StreamToLoguru(log_level) + sys.stderr = redirect_logger + sys.stdout = redirect_logger + + +def setup_logger(save_dir, distributed_rank=0, filename="log.txt", mode="a"): + """setup logger for training and testing. + Args: + save_dir(str): location to save log file + distributed_rank(int): device rank when multi-gpu environment + filename (string): log save name. + mode(str): log file write mode, `append` or `override`. default is `a`. + + Return: + logger instance. + """ + loguru_format = ( + "{time:YYYY-MM-DD HH:mm:ss} | " + "{level: <8} | " + "{name}:{line} - {message}" + ) + + logger.remove() + save_file = os.path.join(save_dir, filename) + if mode == "o" and os.path.exists(save_file): + os.remove(save_file) + # only keep logger in rank0 process + if distributed_rank == 0: + logger.add( + sys.stderr, + format=loguru_format, + level="INFO", + enqueue=True, + ) + logger.add(save_file) + + # redirect stdout/stderr to loguru + redirect_sys_output("INFO") diff --git a/yolox/utils/lr_scheduler.py b/yolox/utils/lr_scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..8f85c230d26d82b667843aac82d795b3d3b7526a --- /dev/null +++ b/yolox/utils/lr_scheduler.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +import math +from functools import partial + + +class LRScheduler: + def __init__(self, name, lr, iters_per_epoch, total_epochs, **kwargs): + """ + Supported lr schedulers: [cos, warmcos, multistep] + + Args: + lr (float): learning rate. + iters_per_peoch (int): number of iterations in one epoch. + total_epochs (int): number of epochs in training. + kwargs (dict): + - cos: None + - warmcos: [warmup_epochs, warmup_lr_start (default 1e-6)] + - multistep: [milestones (epochs), gamma (default 0.1)] + """ + + self.lr = lr + self.iters_per_epoch = iters_per_epoch + self.total_epochs = total_epochs + self.total_iters = iters_per_epoch * total_epochs + + self.__dict__.update(kwargs) + + self.lr_func = self._get_lr_func(name) + + def update_lr(self, iters): + return self.lr_func(iters) + + def _get_lr_func(self, name): + if name == "cos": # cosine lr schedule + lr_func = partial(cos_lr, self.lr, self.total_iters) + elif name == "warmcos": + warmup_total_iters = self.iters_per_epoch * self.warmup_epochs + warmup_lr_start = getattr(self, "warmup_lr_start", 1e-6) + lr_func = partial( + warm_cos_lr, + self.lr, + self.total_iters, + warmup_total_iters, + warmup_lr_start, + ) + elif name == "yoloxwarmcos": + warmup_total_iters = self.iters_per_epoch * self.warmup_epochs + no_aug_iters = self.iters_per_epoch * self.no_aug_epochs + warmup_lr_start = getattr(self, "warmup_lr_start", 0) + min_lr_ratio = getattr(self, "min_lr_ratio", 0.2) + lr_func = partial( + yolox_warm_cos_lr, + self.lr, + min_lr_ratio, + self.total_iters, + warmup_total_iters, + warmup_lr_start, + no_aug_iters, + ) + elif name == "yoloxsemiwarmcos": + warmup_lr_start = getattr(self, "warmup_lr_start", 0) + min_lr_ratio = getattr(self, "min_lr_ratio", 0.2) + warmup_total_iters = self.iters_per_epoch * self.warmup_epochs + no_aug_iters = self.iters_per_epoch * self.no_aug_epochs + normal_iters = self.iters_per_epoch * self.semi_epoch + semi_iters = self.iters_per_epoch_semi * ( + self.total_epochs - self.semi_epoch - self.no_aug_epochs + ) + lr_func = partial( + yolox_semi_warm_cos_lr, + self.lr, + min_lr_ratio, + warmup_lr_start, + self.total_iters, + normal_iters, + no_aug_iters, + warmup_total_iters, + semi_iters, + self.iters_per_epoch, + self.iters_per_epoch_semi, + ) + elif name == "multistep": # stepwise lr schedule + milestones = [ + int(self.total_iters * milestone / self.total_epochs) + for milestone in self.milestones + ] + gamma = getattr(self, "gamma", 0.1) + lr_func = partial(multistep_lr, self.lr, milestones, gamma) + else: + raise ValueError("Scheduler version {} not supported.".format(name)) + return lr_func + + +def cos_lr(lr, total_iters, iters): + """Cosine learning rate""" + lr *= 0.5 * (1.0 + math.cos(math.pi * iters / total_iters)) + return lr + + +def warm_cos_lr(lr, total_iters, warmup_total_iters, warmup_lr_start, iters): + """Cosine learning rate with warm up.""" + if iters <= warmup_total_iters: + lr = (lr - warmup_lr_start) * iters / float( + warmup_total_iters + ) + warmup_lr_start + else: + lr *= 0.5 * ( + 1.0 + + math.cos( + math.pi + * (iters - warmup_total_iters) + / (total_iters - warmup_total_iters) + ) + ) + return lr + + +def yolox_warm_cos_lr( + lr, + min_lr_ratio, + total_iters, + warmup_total_iters, + warmup_lr_start, + no_aug_iter, + iters, +): + """Cosine learning rate with warm up.""" + min_lr = lr * min_lr_ratio + if iters <= warmup_total_iters: + # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start + lr = (lr - warmup_lr_start) * pow( + iters / float(warmup_total_iters), 2 + ) + warmup_lr_start + elif iters >= total_iters - no_aug_iter: + lr = min_lr + else: + lr = min_lr + 0.5 * (lr - min_lr) * ( + 1.0 + + math.cos( + math.pi + * (iters - warmup_total_iters) + / (total_iters - warmup_total_iters - no_aug_iter) + ) + ) + return lr + + +def yolox_semi_warm_cos_lr( + lr, + min_lr_ratio, + warmup_lr_start, + total_iters, + normal_iters, + no_aug_iters, + warmup_total_iters, + semi_iters, + iters_per_epoch, + iters_per_epoch_semi, + iters, +): + """Cosine learning rate with warm up.""" + min_lr = lr * min_lr_ratio + if iters <= warmup_total_iters: + # lr = (lr - warmup_lr_start) * iters / float(warmup_total_iters) + warmup_lr_start + lr = (lr - warmup_lr_start) * pow( + iters / float(warmup_total_iters), 2 + ) + warmup_lr_start + elif iters >= normal_iters + semi_iters: + lr = min_lr + elif iters <= normal_iters: + lr = min_lr + 0.5 * (lr - min_lr) * ( + 1.0 + + math.cos( + math.pi + * (iters - warmup_total_iters) + / (total_iters - warmup_total_iters - no_aug_iters) + ) + ) + else: + lr = min_lr + 0.5 * (lr - min_lr) * ( + 1.0 + + math.cos( + math.pi + * ( + normal_iters + - warmup_total_iters + + (iters - normal_iters) + * iters_per_epoch + * 1.0 + / iters_per_epoch_semi + ) + / (total_iters - warmup_total_iters - no_aug_iters) + ) + ) + return lr + + +def multistep_lr(lr, milestones, gamma, iters): + """MultiStep learning rate""" + for milestone in milestones: + lr *= gamma if iters >= milestone else 1.0 + return lr diff --git a/yolox/utils/metric.py b/yolox/utils/metric.py new file mode 100644 index 0000000000000000000000000000000000000000..4840b8dd0e97d26891fb8c515b6999cf35bd9544 --- /dev/null +++ b/yolox/utils/metric.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. +import numpy as np + +import torch + +import functools +import os +import time +from collections import defaultdict, deque + +__all__ = [ + "AverageMeter", + "MeterBuffer", + "get_total_and_free_memory_in_Mb", + "occupy_mem", + "gpu_mem_usage", +] + + +def get_total_and_free_memory_in_Mb(cuda_device): + devices_info_str = os.popen( + "nvidia-smi --query-gpu=memory.total,memory.used --format=csv,nounits,noheader" + ) + devices_info = devices_info_str.read().strip().split("\n") + total, used = devices_info[int(cuda_device)].split(",") + return int(total), int(used) + + +def occupy_mem(cuda_device, mem_ratio=0.95): + """ + pre-allocate gpu memory for training to avoid memory Fragmentation. + """ + total, used = get_total_and_free_memory_in_Mb(cuda_device) + max_mem = int(total * mem_ratio) + block_mem = max_mem - used + x = torch.cuda.FloatTensor(256, 1024, block_mem) + del x + time.sleep(5) + + +def gpu_mem_usage(): + """ + Compute the GPU memory usage for the current device (MB). + """ + mem_usage_bytes = torch.cuda.max_memory_allocated() + return mem_usage_bytes / (1024 * 1024) + + +class AverageMeter: + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=50): + self._deque = deque(maxlen=window_size) + self._total = 0.0 + self._count = 0 + + def update(self, value): + self._deque.append(value) + self._count += 1 + self._total += value + + @property + def median(self): + d = np.array(list(self._deque)) + return np.median(d) + + @property + def avg(self): + # if deque is empty, nan will be returned. + d = np.array(list(self._deque)) + return d.mean() + + @property + def global_avg(self): + return self._total / max(self._count, 1e-5) + + @property + def latest(self): + return self._deque[-1] if len(self._deque) > 0 else None + + @property + def total(self): + return self._total + + def reset(self): + self._deque.clear() + self._total = 0.0 + self._count = 0 + + def clear(self): + self._deque.clear() + + +class MeterBuffer(defaultdict): + """Computes and stores the average and current value""" + + def __init__(self, window_size=20): + factory = functools.partial(AverageMeter, window_size=window_size) + super().__init__(factory) + + def reset(self): + for v in self.values(): + v.reset() + + def get_filtered_meter(self, filter_key="time"): + return {k: v for k, v in self.items() if filter_key in k} + + def update(self, values=None, **kwargs): + if values is None: + values = {} + values.update(kwargs) + for k, v in values.items(): + if isinstance(v, torch.Tensor): + v = v.detach() + self[k].update(v) + + def clear_meters(self): + for v in self.values(): + v.clear() diff --git a/yolox/utils/model_utils.py b/yolox/utils/model_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..b4e581211e7842cfffdd4c977577e8110b8d05b5 --- /dev/null +++ b/yolox/utils/model_utils.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +import torch +import torch.nn as nn +from thop import profile + +from copy import deepcopy + +__all__ = [ + "fuse_conv_and_bn", + "fuse_model", + "get_model_info", + "replace_module", +] + + +def get_model_info(model, tsize): + + stride = 64 + img = torch.zeros((1, 3, stride, stride), device=next(model.parameters()).device) + flops, params = profile(deepcopy(model), inputs=(img,), verbose=False) + params /= 1e6 + flops /= 1e9 + flops *= tsize[0] * tsize[1] / stride / stride * 2 # Gflops + info = "Params: {:.2f}M, Gflops: {:.2f}".format(params, flops) + return info + + +def fuse_conv_and_bn(conv, bn): + # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ + fusedconv = ( + nn.Conv2d( + conv.in_channels, + conv.out_channels, + kernel_size=conv.kernel_size, + stride=conv.stride, + padding=conv.padding, + groups=conv.groups, + bias=True, + ) + .requires_grad_(False) + .to(conv.weight.device) + ) + + # prepare filters + w_conv = conv.weight.clone().view(conv.out_channels, -1) + w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) + fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape)) + + # prepare spatial bias + b_conv = ( + torch.zeros(conv.weight.size(0), device=conv.weight.device) + if conv.bias is None + else conv.bias + ) + b_bn = bn.bias - bn.weight.mul(bn.running_mean).div( + torch.sqrt(bn.running_var + bn.eps) + ) + fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) + + return fusedconv + + +def fuse_model(model): + from yolox.models.network_blocks import BaseConv + + for m in model.modules(): + if type(m) is BaseConv and hasattr(m, "bn"): + m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv + delattr(m, "bn") # remove batchnorm + m.forward = m.fuseforward # update forward + return model + + +def replace_module(module, replaced_module_type, new_module_type, replace_func=None): + """ + Replace given type in module to a new type. mostly used in deploy. + + Args: + module (nn.Module): model to apply replace operation. + replaced_module_type (Type): module type to be replaced. + new_module_type (Type) + replace_func (function): python function to describe replace logic. Defalut value None. + + Returns: + model (nn.Module): module that already been replaced. + """ + + def default_replace_func(replaced_module_type, new_module_type): + return new_module_type() + + if replace_func is None: + replace_func = default_replace_func + + model = module + if isinstance(module, replaced_module_type): + model = replace_func(replaced_module_type, new_module_type) + else: # recurrsively replace + for name, child in module.named_children(): + new_child = replace_module(child, replaced_module_type, new_module_type) + if new_child is not child: # child is already replaced + model.add_module(name, new_child) + + return model diff --git a/yolox/utils/setup_env.py b/yolox/utils/setup_env.py new file mode 100644 index 0000000000000000000000000000000000000000..f282b1f6dc6f5c2d7a4e8de468e624a721bca94c --- /dev/null +++ b/yolox/utils/setup_env.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +import cv2 + +import os +import subprocess + +__all__ = ["configure_nccl", "configure_module"] + + +def configure_nccl(): + """Configure multi-machine environment variables of NCCL.""" + os.environ["NCCL_LAUNCH_MODE"] = "PARALLEL" + os.environ["NCCL_IB_HCA"] = subprocess.getoutput( + "pushd /sys/class/infiniband/ > /dev/null; for i in mlx5_*; " + "do cat $i/ports/1/gid_attrs/types/* 2>/dev/null " + "| grep v >/dev/null && echo $i ; done; popd > /dev/null" + ) + os.environ["NCCL_IB_GID_INDEX"] = "3" + os.environ["NCCL_IB_TC"] = "106" + + +def configure_module(ulimit_value=8192): + """ + Configure pytorch module environment. setting of ulimit and cv2 will be set. + + Args: + ulimit_value(int): default open file number on linux. Default value: 8192. + """ + # system setting + try: + import resource + + rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) + resource.setrlimit(resource.RLIMIT_NOFILE, (ulimit_value, rlimit[1])) + except Exception: + # Exception might be raised in Windows OS or rlimit reaches max limit number. + # However, set rlimit value might not be necessary. + pass + + # cv2 + # multiprocess might be harmful on performance of torch dataloader + os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled" + try: + cv2.setNumThreads(0) + cv2.ocl.setUseOpenCL(False) + except Exception: + # cv2 version mismatch might rasie exceptions. + pass diff --git a/yolox/utils/visualize.py b/yolox/utils/visualize.py new file mode 100644 index 0000000000000000000000000000000000000000..1d02d474d289df7bf3a9c43a707f403c1858f950 --- /dev/null +++ b/yolox/utils/visualize.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 +# -*- coding:utf-8 -*- +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + +import cv2 +import numpy as np + +__all__ = ["vis"] + + +def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None): + + for i in range(len(boxes)): + box = boxes[i] + cls_id = int(cls_ids[i]) + score = scores[i] + if score < conf: + continue + x0 = int(box[0]) + y0 = int(box[1]) + x1 = int(box[2]) + y1 = int(box[3]) + + color = (_COLORS[cls_id] * 255).astype(np.uint8).tolist() + text = '{}:{:.1f}%'.format(class_names[cls_id], score * 100) + txt_color = (0, 0, 0) if np.mean(_COLORS[cls_id]) > 0.5 else (255, 255, 255) + font = cv2.FONT_HERSHEY_SIMPLEX + + txt_size = cv2.getTextSize(text, font, 0.4, 1)[0] + cv2.rectangle(img, (x0, y0), (x1, y1), color, 2) + + txt_bk_color = (_COLORS[cls_id] * 255 * 0.7).astype(np.uint8).tolist() + cv2.rectangle( + img, + (x0, y0 + 1), + (x0 + txt_size[0] + 1, y0 + int(1.5*txt_size[1])), + txt_bk_color, + -1 + ) + cv2.putText(img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1) + + return img + + +def get_color(idx): + idx = idx * 3 + color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255) + + return color + + +def plot_tracking(image, tlwhs, obj_ids, scores=None, frame_id=0, fps=0., ids2=None): + im = np.ascontiguousarray(np.copy(image)) + im_h, im_w = im.shape[:2] + + top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255 + + #text_scale = max(1, image.shape[1] / 1600.) + #text_thickness = 2 + #line_thickness = max(1, int(image.shape[1] / 500.)) + text_scale = 2 + text_thickness = 2 + line_thickness = 3 + + radius = max(5, int(im_w/140.)) + cv2.putText(im, 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)), + (0, int(15 * text_scale)), cv2.FONT_HERSHEY_PLAIN, 2, (0, 0, 255), thickness=2) + + for i, tlwh in enumerate(tlwhs): + x1, y1, w, h = tlwh + intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h))) + obj_id = int(obj_ids[i]) + id_text = '{}'.format(int(obj_id)) + if ids2 is not None: + id_text = id_text + ', {}'.format(int(ids2[i])) + color = get_color(abs(obj_id)) + cv2.rectangle(im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness) + cv2.putText(im, id_text, (intbox[0], intbox[1]), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 0, 255), + thickness=text_thickness) + return im + + +_COLORS = np.array( + [ + 0.000, 0.447, 0.741, + 0.850, 0.325, 0.098, + 0.929, 0.694, 0.125, + 0.494, 0.184, 0.556, + 0.466, 0.674, 0.188, + 0.301, 0.745, 0.933, + 0.635, 0.078, 0.184, + 0.300, 0.300, 0.300, + 0.600, 0.600, 0.600, + 1.000, 0.000, 0.000, + 1.000, 0.500, 0.000, + 0.749, 0.749, 0.000, + 0.000, 1.000, 0.000, + 0.000, 0.000, 1.000, + 0.667, 0.000, 1.000, + 0.333, 0.333, 0.000, + 0.333, 0.667, 0.000, + 0.333, 1.000, 0.000, + 0.667, 0.333, 0.000, + 0.667, 0.667, 0.000, + 0.667, 1.000, 0.000, + 1.000, 0.333, 0.000, + 1.000, 0.667, 0.000, + 1.000, 1.000, 0.000, + 0.000, 0.333, 0.500, + 0.000, 0.667, 0.500, + 0.000, 1.000, 0.500, + 0.333, 0.000, 0.500, + 0.333, 0.333, 0.500, + 0.333, 0.667, 0.500, + 0.333, 1.000, 0.500, + 0.667, 0.000, 0.500, + 0.667, 0.333, 0.500, + 0.667, 0.667, 0.500, + 0.667, 1.000, 0.500, + 1.000, 0.000, 0.500, + 1.000, 0.333, 0.500, + 1.000, 0.667, 0.500, + 1.000, 1.000, 0.500, + 0.000, 0.333, 1.000, + 0.000, 0.667, 1.000, + 0.000, 1.000, 1.000, + 0.333, 0.000, 1.000, + 0.333, 0.333, 1.000, + 0.333, 0.667, 1.000, + 0.333, 1.000, 1.000, + 0.667, 0.000, 1.000, + 0.667, 0.333, 1.000, + 0.667, 0.667, 1.000, + 0.667, 1.000, 1.000, + 1.000, 0.000, 1.000, + 1.000, 0.333, 1.000, + 1.000, 0.667, 1.000, + 0.333, 0.000, 0.000, + 0.500, 0.000, 0.000, + 0.667, 0.000, 0.000, + 0.833, 0.000, 0.000, + 1.000, 0.000, 0.000, + 0.000, 0.167, 0.000, + 0.000, 0.333, 0.000, + 0.000, 0.500, 0.000, + 0.000, 0.667, 0.000, + 0.000, 0.833, 0.000, + 0.000, 1.000, 0.000, + 0.000, 0.000, 0.167, + 0.000, 0.000, 0.333, + 0.000, 0.000, 0.500, + 0.000, 0.000, 0.667, + 0.000, 0.000, 0.833, + 0.000, 0.000, 1.000, + 0.000, 0.000, 0.000, + 0.143, 0.143, 0.143, + 0.286, 0.286, 0.286, + 0.429, 0.429, 0.429, + 0.571, 0.571, 0.571, + 0.714, 0.714, 0.714, + 0.857, 0.857, 0.857, + 0.000, 0.447, 0.741, + 0.314, 0.717, 0.741, + 0.50, 0.5, 0 + ] +).astype(np.float32).reshape(-1, 3)