nielsr HF staff commited on
Commit
f0392c3
1 Parent(s): a2c277b

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +117 -0
README.md ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - vision
4
+ ---
5
+
6
+ ## DUSt3R
7
+
8
+ # Model info
9
+
10
+ Project page: https://dust3r.europe.naverlabs.com/
11
+
12
+ # How to use
13
+
14
+ Here's how to load the model (after [installing](https://github.com/naver/dust3r?tab=readme-ov-file#installation) the dust3r package):
15
+
16
+ ```python
17
+ from dust3r.model import AsymmetricCroCo3DStereo
18
+ import torch
19
+
20
+ model = AsymmetricCroCo3DStereo.from_pretrained("nielsr/DUSt3R_ViTLarge_BaseDecoder_512_dpt_bis")
21
+
22
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
+ model.to(device)
24
+ ```
25
+
26
+ Next, one can run inference as follows:
27
+
28
+ ```
29
+ from dust3r.inference import inference
30
+ from dust3r.utils.image import load_images
31
+ from dust3r.image_pairs import make_pairs
32
+ from dust3r.cloud_opt import global_aligner, GlobalAlignerMode
33
+
34
+ if __name__ == '__main__':
35
+ batch_size = 1
36
+ schedule = 'cosine'
37
+ lr = 0.01
38
+ niter = 300
39
+
40
+ # load_images can take a list of images or a directory
41
+ images = load_images(['croco/assets/Chateau1.png', 'croco/assets/Chateau2.png'], size=512)
42
+ pairs = make_pairs(images, scene_graph='complete', prefilter=None, symmetrize=True)
43
+ output = inference(pairs, model, device, batch_size=batch_size)
44
+
45
+ # at this stage, you have the raw dust3r predictions
46
+ view1, pred1 = output['view1'], output['pred1']
47
+ view2, pred2 = output['view2'], output['pred2']
48
+ # here, view1, pred1, view2, pred2 are dicts of lists of len(2)
49
+ # -> because we symmetrize we have (im1, im2) and (im2, im1) pairs
50
+ # in each view you have:
51
+ # an integer image identifier: view1['idx'] and view2['idx']
52
+ # the img: view1['img'] and view2['img']
53
+ # the image shape: view1['true_shape'] and view2['true_shape']
54
+ # an instance string output by the dataloader: view1['instance'] and view2['instance']
55
+ # pred1 and pred2 contains the confidence values: pred1['conf'] and pred2['conf']
56
+ # pred1 contains 3D points for view1['img'] in view1['img'] space: pred1['pts3d']
57
+ # pred2 contains 3D points for view2['img'] in view1['img'] space: pred2['pts3d_in_other_view']
58
+
59
+ # next we'll use the global_aligner to align the predictions
60
+ # depending on your task, you may be fine with the raw output and not need it
61
+ # with only two input images, you could use GlobalAlignerMode.PairViewer: it would just convert the output
62
+ # if using GlobalAlignerMode.PairViewer, no need to run compute_global_alignment
63
+ scene = global_aligner(output, device=device, mode=GlobalAlignerMode.PointCloudOptimizer)
64
+ loss = scene.compute_global_alignment(init="mst", niter=niter, schedule=schedule, lr=lr)
65
+
66
+ # retrieve useful values from scene:
67
+ imgs = scene.imgs
68
+ focals = scene.get_focals()
69
+ poses = scene.get_im_poses()
70
+ pts3d = scene.get_pts3d()
71
+ confidence_masks = scene.get_masks()
72
+
73
+ # visualize reconstruction
74
+ scene.show()
75
+
76
+ # find 2D-2D matches between the two images
77
+ from dust3r.utils.geometry import find_reciprocal_matches, xy_grid
78
+ pts2d_list, pts3d_list = [], []
79
+ for i in range(2):
80
+ conf_i = confidence_masks[i].cpu().numpy()
81
+ pts2d_list.append(xy_grid(*imgs[i].shape[:2][::-1])[conf_i]) # imgs[i].shape[:2] = (H, W)
82
+ pts3d_list.append(pts3d[i].detach().cpu().numpy()[conf_i])
83
+ reciprocal_in_P2, nn2_in_P1, num_matches = find_reciprocal_matches(*pts3d_list)
84
+ print(f'found {num_matches} matches')
85
+ matches_im1 = pts2d_list[1][reciprocal_in_P2]
86
+ matches_im0 = pts2d_list[0][nn2_in_P1][reciprocal_in_P2]
87
+
88
+ # visualize a few matches
89
+ import numpy as np
90
+ from matplotlib import pyplot as pl
91
+ n_viz = 10
92
+ match_idx_to_viz = np.round(np.linspace(0, num_matches-1, n_viz)).astype(int)
93
+ viz_matches_im0, viz_matches_im1 = matches_im0[match_idx_to_viz], matches_im1[match_idx_to_viz]
94
+
95
+ H0, W0, H1, W1 = *imgs[0].shape[:2], *imgs[1].shape[:2]
96
+ img0 = np.pad(imgs[0], ((0, max(H1 - H0, 0)), (0, 0), (0, 0)), 'constant', constant_values=0)
97
+ img1 = np.pad(imgs[1], ((0, max(H0 - H1, 0)), (0, 0), (0, 0)), 'constant', constant_values=0)
98
+ img = np.concatenate((img0, img1), axis=1)
99
+ pl.figure()
100
+ pl.imshow(img)
101
+ cmap = pl.get_cmap('jet')
102
+ for i in range(n_viz):
103
+ (x0, y0), (x1, y1) = viz_matches_im0[i].T, viz_matches_im1[i].T
104
+ pl.plot([x0, x1 + W0], [y0, y1], '-+', color=cmap(i / (n_viz - 1)), scalex=False, scaley=False)
105
+ pl.show(block=True)
106
+
107
+ ```
108
+
109
+ ### BibTeX entry and citation info
110
+
111
+ ```bibtex
112
+ @journal{dust3r2023,
113
+ title={{DUSt3R: Geometric 3D Vision Made Easy}},
114
+ author={{Wang, Shuzhe and Leroy, Vincent and Cabon, Yohann and Chidlovskii, Boris and Revaud Jerome}},
115
+ journal={arXiv preprint 2312.14132},
116
+ year={2023}}
117
+ ```