Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- vision
|
4 |
+
---
|
5 |
+
|
6 |
+
## DUSt3R
|
7 |
+
|
8 |
+
# Model info
|
9 |
+
|
10 |
+
Project page: https://dust3r.europe.naverlabs.com/
|
11 |
+
|
12 |
+
# How to use
|
13 |
+
|
14 |
+
Here's how to load the model (after [installing](https://github.com/naver/dust3r?tab=readme-ov-file#installation) the dust3r package):
|
15 |
+
|
16 |
+
```python
|
17 |
+
from dust3r.model import AsymmetricCroCo3DStereo
|
18 |
+
import torch
|
19 |
+
|
20 |
+
model = AsymmetricCroCo3DStereo.from_pretrained("nielsr/DUSt3R_ViTLarge_BaseDecoder_512_dpt_bis")
|
21 |
+
|
22 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
23 |
+
model.to(device)
|
24 |
+
```
|
25 |
+
|
26 |
+
Next, one can run inference as follows:
|
27 |
+
|
28 |
+
```
|
29 |
+
from dust3r.inference import inference
|
30 |
+
from dust3r.utils.image import load_images
|
31 |
+
from dust3r.image_pairs import make_pairs
|
32 |
+
from dust3r.cloud_opt import global_aligner, GlobalAlignerMode
|
33 |
+
|
34 |
+
if __name__ == '__main__':
|
35 |
+
batch_size = 1
|
36 |
+
schedule = 'cosine'
|
37 |
+
lr = 0.01
|
38 |
+
niter = 300
|
39 |
+
|
40 |
+
# load_images can take a list of images or a directory
|
41 |
+
images = load_images(['croco/assets/Chateau1.png', 'croco/assets/Chateau2.png'], size=512)
|
42 |
+
pairs = make_pairs(images, scene_graph='complete', prefilter=None, symmetrize=True)
|
43 |
+
output = inference(pairs, model, device, batch_size=batch_size)
|
44 |
+
|
45 |
+
# at this stage, you have the raw dust3r predictions
|
46 |
+
view1, pred1 = output['view1'], output['pred1']
|
47 |
+
view2, pred2 = output['view2'], output['pred2']
|
48 |
+
# here, view1, pred1, view2, pred2 are dicts of lists of len(2)
|
49 |
+
# -> because we symmetrize we have (im1, im2) and (im2, im1) pairs
|
50 |
+
# in each view you have:
|
51 |
+
# an integer image identifier: view1['idx'] and view2['idx']
|
52 |
+
# the img: view1['img'] and view2['img']
|
53 |
+
# the image shape: view1['true_shape'] and view2['true_shape']
|
54 |
+
# an instance string output by the dataloader: view1['instance'] and view2['instance']
|
55 |
+
# pred1 and pred2 contains the confidence values: pred1['conf'] and pred2['conf']
|
56 |
+
# pred1 contains 3D points for view1['img'] in view1['img'] space: pred1['pts3d']
|
57 |
+
# pred2 contains 3D points for view2['img'] in view1['img'] space: pred2['pts3d_in_other_view']
|
58 |
+
|
59 |
+
# next we'll use the global_aligner to align the predictions
|
60 |
+
# depending on your task, you may be fine with the raw output and not need it
|
61 |
+
# with only two input images, you could use GlobalAlignerMode.PairViewer: it would just convert the output
|
62 |
+
# if using GlobalAlignerMode.PairViewer, no need to run compute_global_alignment
|
63 |
+
scene = global_aligner(output, device=device, mode=GlobalAlignerMode.PointCloudOptimizer)
|
64 |
+
loss = scene.compute_global_alignment(init="mst", niter=niter, schedule=schedule, lr=lr)
|
65 |
+
|
66 |
+
# retrieve useful values from scene:
|
67 |
+
imgs = scene.imgs
|
68 |
+
focals = scene.get_focals()
|
69 |
+
poses = scene.get_im_poses()
|
70 |
+
pts3d = scene.get_pts3d()
|
71 |
+
confidence_masks = scene.get_masks()
|
72 |
+
|
73 |
+
# visualize reconstruction
|
74 |
+
scene.show()
|
75 |
+
|
76 |
+
# find 2D-2D matches between the two images
|
77 |
+
from dust3r.utils.geometry import find_reciprocal_matches, xy_grid
|
78 |
+
pts2d_list, pts3d_list = [], []
|
79 |
+
for i in range(2):
|
80 |
+
conf_i = confidence_masks[i].cpu().numpy()
|
81 |
+
pts2d_list.append(xy_grid(*imgs[i].shape[:2][::-1])[conf_i]) # imgs[i].shape[:2] = (H, W)
|
82 |
+
pts3d_list.append(pts3d[i].detach().cpu().numpy()[conf_i])
|
83 |
+
reciprocal_in_P2, nn2_in_P1, num_matches = find_reciprocal_matches(*pts3d_list)
|
84 |
+
print(f'found {num_matches} matches')
|
85 |
+
matches_im1 = pts2d_list[1][reciprocal_in_P2]
|
86 |
+
matches_im0 = pts2d_list[0][nn2_in_P1][reciprocal_in_P2]
|
87 |
+
|
88 |
+
# visualize a few matches
|
89 |
+
import numpy as np
|
90 |
+
from matplotlib import pyplot as pl
|
91 |
+
n_viz = 10
|
92 |
+
match_idx_to_viz = np.round(np.linspace(0, num_matches-1, n_viz)).astype(int)
|
93 |
+
viz_matches_im0, viz_matches_im1 = matches_im0[match_idx_to_viz], matches_im1[match_idx_to_viz]
|
94 |
+
|
95 |
+
H0, W0, H1, W1 = *imgs[0].shape[:2], *imgs[1].shape[:2]
|
96 |
+
img0 = np.pad(imgs[0], ((0, max(H1 - H0, 0)), (0, 0), (0, 0)), 'constant', constant_values=0)
|
97 |
+
img1 = np.pad(imgs[1], ((0, max(H0 - H1, 0)), (0, 0), (0, 0)), 'constant', constant_values=0)
|
98 |
+
img = np.concatenate((img0, img1), axis=1)
|
99 |
+
pl.figure()
|
100 |
+
pl.imshow(img)
|
101 |
+
cmap = pl.get_cmap('jet')
|
102 |
+
for i in range(n_viz):
|
103 |
+
(x0, y0), (x1, y1) = viz_matches_im0[i].T, viz_matches_im1[i].T
|
104 |
+
pl.plot([x0, x1 + W0], [y0, y1], '-+', color=cmap(i / (n_viz - 1)), scalex=False, scaley=False)
|
105 |
+
pl.show(block=True)
|
106 |
+
|
107 |
+
```
|
108 |
+
|
109 |
+
### BibTeX entry and citation info
|
110 |
+
|
111 |
+
```bibtex
|
112 |
+
@journal{dust3r2023,
|
113 |
+
title={{DUSt3R: Geometric 3D Vision Made Easy}},
|
114 |
+
author={{Wang, Shuzhe and Leroy, Vincent and Cabon, Yohann and Chidlovskii, Boris and Revaud Jerome}},
|
115 |
+
journal={arXiv preprint 2312.14132},
|
116 |
+
year={2023}}
|
117 |
+
```
|