LanHarmony commited on
Commit
69af532
1 Parent(s): 2ce295b

introduce control net from diffusers

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. ControlNet/annotator/canny/__init__.py +0 -6
  2. ControlNet/annotator/ckpts/body_pose_model.pth +0 -3
  3. ControlNet/annotator/ckpts/ckpts.txt +0 -1
  4. ControlNet/annotator/ckpts/dpt_hybrid-midas-501f0c75.pt +0 -3
  5. ControlNet/annotator/ckpts/hand_pose_model.pth +0 -3
  6. ControlNet/annotator/ckpts/mlsd_large_512_fp32.pth +0 -3
  7. ControlNet/annotator/ckpts/network-bsds500.pth +0 -3
  8. ControlNet/annotator/ckpts/upernet_global_small.pth +0 -3
  9. ControlNet/annotator/hed/__init__.py +0 -132
  10. ControlNet/annotator/midas/__init__.py +0 -38
  11. ControlNet/annotator/midas/api.py +0 -169
  12. ControlNet/annotator/midas/midas/__init__.py +0 -0
  13. ControlNet/annotator/midas/midas/base_model.py +0 -16
  14. ControlNet/annotator/midas/midas/blocks.py +0 -342
  15. ControlNet/annotator/midas/midas/dpt_depth.py +0 -109
  16. ControlNet/annotator/midas/midas/midas_net.py +0 -76
  17. ControlNet/annotator/midas/midas/midas_net_custom.py +0 -128
  18. ControlNet/annotator/midas/midas/transforms.py +0 -234
  19. ControlNet/annotator/midas/midas/vit.py +0 -491
  20. ControlNet/annotator/midas/utils.py +0 -189
  21. ControlNet/annotator/mlsd/__init__.py +0 -39
  22. ControlNet/annotator/mlsd/models/mbv2_mlsd_large.py +0 -292
  23. ControlNet/annotator/mlsd/models/mbv2_mlsd_tiny.py +0 -275
  24. ControlNet/annotator/mlsd/utils.py +0 -580
  25. ControlNet/annotator/openpose/__init__.py +0 -44
  26. ControlNet/annotator/openpose/body.py +0 -219
  27. ControlNet/annotator/openpose/hand.py +0 -86
  28. ControlNet/annotator/openpose/model.py +0 -219
  29. ControlNet/annotator/openpose/util.py +0 -164
  30. ControlNet/annotator/uniformer/__init__.py +0 -23
  31. ControlNet/annotator/uniformer/configs/_base_/datasets/ade20k.py +0 -54
  32. ControlNet/annotator/uniformer/configs/_base_/datasets/chase_db1.py +0 -59
  33. ControlNet/annotator/uniformer/configs/_base_/datasets/cityscapes.py +0 -54
  34. ControlNet/annotator/uniformer/configs/_base_/datasets/cityscapes_769x769.py +0 -35
  35. ControlNet/annotator/uniformer/configs/_base_/datasets/drive.py +0 -59
  36. ControlNet/annotator/uniformer/configs/_base_/datasets/hrf.py +0 -59
  37. ControlNet/annotator/uniformer/configs/_base_/datasets/pascal_context.py +0 -60
  38. ControlNet/annotator/uniformer/configs/_base_/datasets/pascal_context_59.py +0 -60
  39. ControlNet/annotator/uniformer/configs/_base_/datasets/pascal_voc12.py +0 -57
  40. ControlNet/annotator/uniformer/configs/_base_/datasets/pascal_voc12_aug.py +0 -9
  41. ControlNet/annotator/uniformer/configs/_base_/datasets/stare.py +0 -59
  42. ControlNet/annotator/uniformer/configs/_base_/default_runtime.py +0 -14
  43. ControlNet/annotator/uniformer/configs/_base_/models/ann_r50-d8.py +0 -46
  44. ControlNet/annotator/uniformer/configs/_base_/models/apcnet_r50-d8.py +0 -44
  45. ControlNet/annotator/uniformer/configs/_base_/models/ccnet_r50-d8.py +0 -44
  46. ControlNet/annotator/uniformer/configs/_base_/models/cgnet.py +0 -35
  47. ControlNet/annotator/uniformer/configs/_base_/models/danet_r50-d8.py +0 -44
  48. ControlNet/annotator/uniformer/configs/_base_/models/deeplabv3_r50-d8.py +0 -44
  49. ControlNet/annotator/uniformer/configs/_base_/models/deeplabv3_unet_s5-d16.py +0 -50
  50. ControlNet/annotator/uniformer/configs/_base_/models/deeplabv3plus_r50-d8.py +0 -46
ControlNet/annotator/canny/__init__.py DELETED
@@ -1,6 +0,0 @@
1
- import cv2
2
-
3
-
4
- class CannyDetector:
5
- def __call__(self, img, low_threshold, high_threshold):
6
- return cv2.Canny(img, low_threshold, high_threshold)
 
 
 
 
 
 
ControlNet/annotator/ckpts/body_pose_model.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:25a948c16078b0f08e236bda51a385d855ef4c153598947c28c0d47ed94bb746
3
- size 209267595
 
 
 
ControlNet/annotator/ckpts/ckpts.txt DELETED
@@ -1 +0,0 @@
1
- Weights here.
 
ControlNet/annotator/ckpts/dpt_hybrid-midas-501f0c75.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:501f0c75b3bca7daec6b3682c5054c09b366765aef6fa3a09d03a5cb4b230853
3
- size 492757791
 
 
 
ControlNet/annotator/ckpts/hand_pose_model.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b76b00d1750901abd07b9f9d8c98cc3385b8fe834a26d4b4f0aad439e75fc600
3
- size 147341049
 
 
 
ControlNet/annotator/ckpts/mlsd_large_512_fp32.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5696f168eb2c30d4374bbfd45436f7415bb4d88da29bea97eea0101520fba082
3
- size 6341481
 
 
 
ControlNet/annotator/ckpts/network-bsds500.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:58a858782f5fa3e0ca3dc92e7a1a609add93987d77be3dfa54f8f8419d881a94
3
- size 58871680
 
 
 
ControlNet/annotator/ckpts/upernet_global_small.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bebfa1264c10381e389d8065056baaadbdadee8ddc6e36770d1ec339dc84d970
3
- size 206313115
 
 
 
ControlNet/annotator/hed/__init__.py DELETED
@@ -1,132 +0,0 @@
1
- import numpy as np
2
- import cv2
3
- import os
4
- import torch
5
- from einops import rearrange
6
- from annotator.util import annotator_ckpts_path
7
-
8
-
9
- class Network(torch.nn.Module):
10
- def __init__(self, model_path):
11
- super().__init__()
12
-
13
- self.netVggOne = torch.nn.Sequential(
14
- torch.nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1),
15
- torch.nn.ReLU(inplace=False),
16
- torch.nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
17
- torch.nn.ReLU(inplace=False)
18
- )
19
-
20
- self.netVggTwo = torch.nn.Sequential(
21
- torch.nn.MaxPool2d(kernel_size=2, stride=2),
22
- torch.nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
23
- torch.nn.ReLU(inplace=False),
24
- torch.nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
25
- torch.nn.ReLU(inplace=False)
26
- )
27
-
28
- self.netVggThr = torch.nn.Sequential(
29
- torch.nn.MaxPool2d(kernel_size=2, stride=2),
30
- torch.nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
31
- torch.nn.ReLU(inplace=False),
32
- torch.nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
33
- torch.nn.ReLU(inplace=False),
34
- torch.nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
35
- torch.nn.ReLU(inplace=False)
36
- )
37
-
38
- self.netVggFou = torch.nn.Sequential(
39
- torch.nn.MaxPool2d(kernel_size=2, stride=2),
40
- torch.nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
41
- torch.nn.ReLU(inplace=False),
42
- torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
43
- torch.nn.ReLU(inplace=False),
44
- torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
45
- torch.nn.ReLU(inplace=False)
46
- )
47
-
48
- self.netVggFiv = torch.nn.Sequential(
49
- torch.nn.MaxPool2d(kernel_size=2, stride=2),
50
- torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
51
- torch.nn.ReLU(inplace=False),
52
- torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
53
- torch.nn.ReLU(inplace=False),
54
- torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
55
- torch.nn.ReLU(inplace=False)
56
- )
57
-
58
- self.netScoreOne = torch.nn.Conv2d(in_channels=64, out_channels=1, kernel_size=1, stride=1, padding=0)
59
- self.netScoreTwo = torch.nn.Conv2d(in_channels=128, out_channels=1, kernel_size=1, stride=1, padding=0)
60
- self.netScoreThr = torch.nn.Conv2d(in_channels=256, out_channels=1, kernel_size=1, stride=1, padding=0)
61
- self.netScoreFou = torch.nn.Conv2d(in_channels=512, out_channels=1, kernel_size=1, stride=1, padding=0)
62
- self.netScoreFiv = torch.nn.Conv2d(in_channels=512, out_channels=1, kernel_size=1, stride=1, padding=0)
63
-
64
- self.netCombine = torch.nn.Sequential(
65
- torch.nn.Conv2d(in_channels=5, out_channels=1, kernel_size=1, stride=1, padding=0),
66
- torch.nn.Sigmoid()
67
- )
68
-
69
- self.load_state_dict({strKey.replace('module', 'net'): tenWeight for strKey, tenWeight in torch.load(model_path).items()})
70
-
71
- def forward(self, tenInput):
72
- tenInput = tenInput * 255.0
73
- tenInput = tenInput - torch.tensor(data=[104.00698793, 116.66876762, 122.67891434], dtype=tenInput.dtype, device=tenInput.device).view(1, 3, 1, 1)
74
-
75
- tenVggOne = self.netVggOne(tenInput)
76
- tenVggTwo = self.netVggTwo(tenVggOne)
77
- tenVggThr = self.netVggThr(tenVggTwo)
78
- tenVggFou = self.netVggFou(tenVggThr)
79
- tenVggFiv = self.netVggFiv(tenVggFou)
80
-
81
- tenScoreOne = self.netScoreOne(tenVggOne)
82
- tenScoreTwo = self.netScoreTwo(tenVggTwo)
83
- tenScoreThr = self.netScoreThr(tenVggThr)
84
- tenScoreFou = self.netScoreFou(tenVggFou)
85
- tenScoreFiv = self.netScoreFiv(tenVggFiv)
86
-
87
- tenScoreOne = torch.nn.functional.interpolate(input=tenScoreOne, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False)
88
- tenScoreTwo = torch.nn.functional.interpolate(input=tenScoreTwo, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False)
89
- tenScoreThr = torch.nn.functional.interpolate(input=tenScoreThr, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False)
90
- tenScoreFou = torch.nn.functional.interpolate(input=tenScoreFou, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False)
91
- tenScoreFiv = torch.nn.functional.interpolate(input=tenScoreFiv, size=(tenInput.shape[2], tenInput.shape[3]), mode='bilinear', align_corners=False)
92
-
93
- return self.netCombine(torch.cat([ tenScoreOne, tenScoreTwo, tenScoreThr, tenScoreFou, tenScoreFiv ], 1))
94
-
95
-
96
- class HEDdetector:
97
- def __init__(self):
98
- remote_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/network-bsds500.pth"
99
- modelpath = os.path.join(annotator_ckpts_path, "network-bsds500.pth")
100
- if not os.path.exists(modelpath):
101
- from basicsr.utils.download_util import load_file_from_url
102
- load_file_from_url(remote_model_path, model_dir=annotator_ckpts_path)
103
- self.netNetwork = Network(modelpath).cuda().eval()
104
-
105
- def __call__(self, input_image):
106
- assert input_image.ndim == 3
107
- input_image = input_image[:, :, ::-1].copy()
108
- with torch.no_grad():
109
- image_hed = torch.from_numpy(input_image).float().cuda()
110
- image_hed = image_hed / 255.0
111
- image_hed = rearrange(image_hed, 'h w c -> 1 c h w')
112
- edge = self.netNetwork(image_hed)[0]
113
- edge = (edge.cpu().numpy() * 255.0).clip(0, 255).astype(np.uint8)
114
- return edge[0]
115
-
116
-
117
- def nms(x, t, s):
118
- x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s)
119
-
120
- f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8)
121
- f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8)
122
- f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8)
123
- f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8)
124
-
125
- y = np.zeros_like(x)
126
-
127
- for f in [f1, f2, f3, f4]:
128
- np.putmask(y, cv2.dilate(x, kernel=f) == x, x)
129
-
130
- z = np.zeros_like(y, dtype=np.uint8)
131
- z[y > t] = 255
132
- return z
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ControlNet/annotator/midas/__init__.py DELETED
@@ -1,38 +0,0 @@
1
- import cv2
2
- import numpy as np
3
- import torch
4
-
5
- from einops import rearrange
6
- from .api import MiDaSInference
7
-
8
-
9
- class MidasDetector:
10
- def __init__(self):
11
- self.model = MiDaSInference(model_type="dpt_hybrid").cuda()
12
-
13
- def __call__(self, input_image, a=np.pi * 2.0, bg_th=0.1):
14
- assert input_image.ndim == 3
15
- image_depth = input_image
16
- with torch.no_grad():
17
- image_depth = torch.from_numpy(image_depth).float().cuda()
18
- image_depth = image_depth / 127.5 - 1.0
19
- image_depth = rearrange(image_depth, 'h w c -> 1 c h w')
20
- depth = self.model(image_depth)[0]
21
-
22
- depth_pt = depth.clone()
23
- depth_pt -= torch.min(depth_pt)
24
- depth_pt /= torch.max(depth_pt)
25
- depth_pt = depth_pt.cpu().numpy()
26
- depth_image = (depth_pt * 255.0).clip(0, 255).astype(np.uint8)
27
-
28
- depth_np = depth.cpu().numpy()
29
- x = cv2.Sobel(depth_np, cv2.CV_32F, 1, 0, ksize=3)
30
- y = cv2.Sobel(depth_np, cv2.CV_32F, 0, 1, ksize=3)
31
- z = np.ones_like(x) * a
32
- x[depth_pt < bg_th] = 0
33
- y[depth_pt < bg_th] = 0
34
- normal = np.stack([x, y, z], axis=2)
35
- normal /= np.sum(normal ** 2.0, axis=2, keepdims=True) ** 0.5
36
- normal_image = (normal * 127.5 + 127.5).clip(0, 255).astype(np.uint8)
37
-
38
- return depth_image, normal_image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ControlNet/annotator/midas/api.py DELETED
@@ -1,169 +0,0 @@
1
- # based on https://github.com/isl-org/MiDaS
2
-
3
- import cv2
4
- import os
5
- import torch
6
- import torch.nn as nn
7
- from torchvision.transforms import Compose
8
-
9
- from .midas.dpt_depth import DPTDepthModel
10
- from .midas.midas_net import MidasNet
11
- from .midas.midas_net_custom import MidasNet_small
12
- from .midas.transforms import Resize, NormalizeImage, PrepareForNet
13
- from annotator.util import annotator_ckpts_path
14
-
15
-
16
- ISL_PATHS = {
17
- "dpt_large": os.path.join(annotator_ckpts_path, "dpt_large-midas-2f21e586.pt"),
18
- "dpt_hybrid": os.path.join(annotator_ckpts_path, "dpt_hybrid-midas-501f0c75.pt"),
19
- "midas_v21": "",
20
- "midas_v21_small": "",
21
- }
22
-
23
- remote_model_path = "https://huggingface.co/lllyasviel/ControlNet/resolve/main/annotator/ckpts/dpt_hybrid-midas-501f0c75.pt"
24
-
25
-
26
- def disabled_train(self, mode=True):
27
- """Overwrite model.train with this function to make sure train/eval mode
28
- does not change anymore."""
29
- return self
30
-
31
-
32
- def load_midas_transform(model_type):
33
- # https://github.com/isl-org/MiDaS/blob/master/run.py
34
- # load transform only
35
- if model_type == "dpt_large": # DPT-Large
36
- net_w, net_h = 384, 384
37
- resize_mode = "minimal"
38
- normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
39
-
40
- elif model_type == "dpt_hybrid": # DPT-Hybrid
41
- net_w, net_h = 384, 384
42
- resize_mode = "minimal"
43
- normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
44
-
45
- elif model_type == "midas_v21":
46
- net_w, net_h = 384, 384
47
- resize_mode = "upper_bound"
48
- normalization = NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
49
-
50
- elif model_type == "midas_v21_small":
51
- net_w, net_h = 256, 256
52
- resize_mode = "upper_bound"
53
- normalization = NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
54
-
55
- else:
56
- assert False, f"model_type '{model_type}' not implemented, use: --model_type large"
57
-
58
- transform = Compose(
59
- [
60
- Resize(
61
- net_w,
62
- net_h,
63
- resize_target=None,
64
- keep_aspect_ratio=True,
65
- ensure_multiple_of=32,
66
- resize_method=resize_mode,
67
- image_interpolation_method=cv2.INTER_CUBIC,
68
- ),
69
- normalization,
70
- PrepareForNet(),
71
- ]
72
- )
73
-
74
- return transform
75
-
76
-
77
- def load_model(model_type):
78
- # https://github.com/isl-org/MiDaS/blob/master/run.py
79
- # load network
80
- model_path = ISL_PATHS[model_type]
81
- if model_type == "dpt_large": # DPT-Large
82
- model = DPTDepthModel(
83
- path=model_path,
84
- backbone="vitl16_384",
85
- non_negative=True,
86
- )
87
- net_w, net_h = 384, 384
88
- resize_mode = "minimal"
89
- normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
90
-
91
- elif model_type == "dpt_hybrid": # DPT-Hybrid
92
- if not os.path.exists(model_path):
93
- from basicsr.utils.download_util import load_file_from_url
94
- load_file_from_url(remote_model_path, model_dir=annotator_ckpts_path)
95
-
96
- model = DPTDepthModel(
97
- path=model_path,
98
- backbone="vitb_rn50_384",
99
- non_negative=True,
100
- )
101
- net_w, net_h = 384, 384
102
- resize_mode = "minimal"
103
- normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
104
-
105
- elif model_type == "midas_v21":
106
- model = MidasNet(model_path, non_negative=True)
107
- net_w, net_h = 384, 384
108
- resize_mode = "upper_bound"
109
- normalization = NormalizeImage(
110
- mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
111
- )
112
-
113
- elif model_type == "midas_v21_small":
114
- model = MidasNet_small(model_path, features=64, backbone="efficientnet_lite3", exportable=True,
115
- non_negative=True, blocks={'expand': True})
116
- net_w, net_h = 256, 256
117
- resize_mode = "upper_bound"
118
- normalization = NormalizeImage(
119
- mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
120
- )
121
-
122
- else:
123
- print(f"model_type '{model_type}' not implemented, use: --model_type large")
124
- assert False
125
-
126
- transform = Compose(
127
- [
128
- Resize(
129
- net_w,
130
- net_h,
131
- resize_target=None,
132
- keep_aspect_ratio=True,
133
- ensure_multiple_of=32,
134
- resize_method=resize_mode,
135
- image_interpolation_method=cv2.INTER_CUBIC,
136
- ),
137
- normalization,
138
- PrepareForNet(),
139
- ]
140
- )
141
-
142
- return model.eval(), transform
143
-
144
-
145
- class MiDaSInference(nn.Module):
146
- MODEL_TYPES_TORCH_HUB = [
147
- "DPT_Large",
148
- "DPT_Hybrid",
149
- "MiDaS_small"
150
- ]
151
- MODEL_TYPES_ISL = [
152
- "dpt_large",
153
- "dpt_hybrid",
154
- "midas_v21",
155
- "midas_v21_small",
156
- ]
157
-
158
- def __init__(self, model_type):
159
- super().__init__()
160
- assert (model_type in self.MODEL_TYPES_ISL)
161
- model, _ = load_model(model_type)
162
- self.model = model
163
- self.model.train = disabled_train
164
-
165
- def forward(self, x):
166
- with torch.no_grad():
167
- prediction = self.model(x)
168
- return prediction
169
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ControlNet/annotator/midas/midas/__init__.py DELETED
File without changes
ControlNet/annotator/midas/midas/base_model.py DELETED
@@ -1,16 +0,0 @@
1
- import torch
2
-
3
-
4
- class BaseModel(torch.nn.Module):
5
- def load(self, path):
6
- """Load model from file.
7
-
8
- Args:
9
- path (str): file path
10
- """
11
- parameters = torch.load(path, map_location=torch.device('cpu'))
12
-
13
- if "optimizer" in parameters:
14
- parameters = parameters["model"]
15
-
16
- self.load_state_dict(parameters)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ControlNet/annotator/midas/midas/blocks.py DELETED
@@ -1,342 +0,0 @@
1
- import torch
2
- import torch.nn as nn
3
-
4
- from .vit import (
5
- _make_pretrained_vitb_rn50_384,
6
- _make_pretrained_vitl16_384,
7
- _make_pretrained_vitb16_384,
8
- forward_vit,
9
- )
10
-
11
- def _make_encoder(backbone, features, use_pretrained, groups=1, expand=False, exportable=True, hooks=None, use_vit_only=False, use_readout="ignore",):
12
- if backbone == "vitl16_384":
13
- pretrained = _make_pretrained_vitl16_384(
14
- use_pretrained, hooks=hooks, use_readout=use_readout
15
- )
16
- scratch = _make_scratch(
17
- [256, 512, 1024, 1024], features, groups=groups, expand=expand
18
- ) # ViT-L/16 - 85.0% Top1 (backbone)
19
- elif backbone == "vitb_rn50_384":
20
- pretrained = _make_pretrained_vitb_rn50_384(
21
- use_pretrained,
22
- hooks=hooks,
23
- use_vit_only=use_vit_only,
24
- use_readout=use_readout,
25
- )
26
- scratch = _make_scratch(
27
- [256, 512, 768, 768], features, groups=groups, expand=expand
28
- ) # ViT-H/16 - 85.0% Top1 (backbone)
29
- elif backbone == "vitb16_384":
30
- pretrained = _make_pretrained_vitb16_384(
31
- use_pretrained, hooks=hooks, use_readout=use_readout
32
- )
33
- scratch = _make_scratch(
34
- [96, 192, 384, 768], features, groups=groups, expand=expand
35
- ) # ViT-B/16 - 84.6% Top1 (backbone)
36
- elif backbone == "resnext101_wsl":
37
- pretrained = _make_pretrained_resnext101_wsl(use_pretrained)
38
- scratch = _make_scratch([256, 512, 1024, 2048], features, groups=groups, expand=expand) # efficientnet_lite3
39
- elif backbone == "efficientnet_lite3":
40
- pretrained = _make_pretrained_efficientnet_lite3(use_pretrained, exportable=exportable)
41
- scratch = _make_scratch([32, 48, 136, 384], features, groups=groups, expand=expand) # efficientnet_lite3
42
- else:
43
- print(f"Backbone '{backbone}' not implemented")
44
- assert False
45
-
46
- return pretrained, scratch
47
-
48
-
49
- def _make_scratch(in_shape, out_shape, groups=1, expand=False):
50
- scratch = nn.Module()
51
-
52
- out_shape1 = out_shape
53
- out_shape2 = out_shape
54
- out_shape3 = out_shape
55
- out_shape4 = out_shape
56
- if expand==True:
57
- out_shape1 = out_shape
58
- out_shape2 = out_shape*2
59
- out_shape3 = out_shape*4
60
- out_shape4 = out_shape*8
61
-
62
- scratch.layer1_rn = nn.Conv2d(
63
- in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
64
- )
65
- scratch.layer2_rn = nn.Conv2d(
66
- in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
67
- )
68
- scratch.layer3_rn = nn.Conv2d(
69
- in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
70
- )
71
- scratch.layer4_rn = nn.Conv2d(
72
- in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups
73
- )
74
-
75
- return scratch
76
-
77
-
78
- def _make_pretrained_efficientnet_lite3(use_pretrained, exportable=False):
79
- efficientnet = torch.hub.load(
80
- "rwightman/gen-efficientnet-pytorch",
81
- "tf_efficientnet_lite3",
82
- pretrained=use_pretrained,
83
- exportable=exportable
84
- )
85
- return _make_efficientnet_backbone(efficientnet)
86
-
87
-
88
- def _make_efficientnet_backbone(effnet):
89
- pretrained = nn.Module()
90
-
91
- pretrained.layer1 = nn.Sequential(
92
- effnet.conv_stem, effnet.bn1, effnet.act1, *effnet.blocks[0:2]
93
- )
94
- pretrained.layer2 = nn.Sequential(*effnet.blocks[2:3])
95
- pretrained.layer3 = nn.Sequential(*effnet.blocks[3:5])
96
- pretrained.layer4 = nn.Sequential(*effnet.blocks[5:9])
97
-
98
- return pretrained
99
-
100
-
101
- def _make_resnet_backbone(resnet):
102
- pretrained = nn.Module()
103
- pretrained.layer1 = nn.Sequential(
104
- resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool, resnet.layer1
105
- )
106
-
107
- pretrained.layer2 = resnet.layer2
108
- pretrained.layer3 = resnet.layer3
109
- pretrained.layer4 = resnet.layer4
110
-
111
- return pretrained
112
-
113
-
114
- def _make_pretrained_resnext101_wsl(use_pretrained):
115
- resnet = torch.hub.load("facebookresearch/WSL-Images", "resnext101_32x8d_wsl")
116
- return _make_resnet_backbone(resnet)
117
-
118
-
119
-
120
- class Interpolate(nn.Module):
121
- """Interpolation module.
122
- """
123
-
124
- def __init__(self, scale_factor, mode, align_corners=False):
125
- """Init.
126
-
127
- Args:
128
- scale_factor (float): scaling
129
- mode (str): interpolation mode
130
- """
131
- super(Interpolate, self).__init__()
132
-
133
- self.interp = nn.functional.interpolate
134
- self.scale_factor = scale_factor
135
- self.mode = mode
136
- self.align_corners = align_corners
137
-
138
- def forward(self, x):
139
- """Forward pass.
140
-
141
- Args:
142
- x (tensor): input
143
-
144
- Returns:
145
- tensor: interpolated data
146
- """
147
-
148
- x = self.interp(
149
- x, scale_factor=self.scale_factor, mode=self.mode, align_corners=self.align_corners
150
- )
151
-
152
- return x
153
-
154
-
155
- class ResidualConvUnit(nn.Module):
156
- """Residual convolution module.
157
- """
158
-
159
- def __init__(self, features):
160
- """Init.
161
-
162
- Args:
163
- features (int): number of features
164
- """
165
- super().__init__()
166
-
167
- self.conv1 = nn.Conv2d(
168
- features, features, kernel_size=3, stride=1, padding=1, bias=True
169
- )
170
-
171
- self.conv2 = nn.Conv2d(
172
- features, features, kernel_size=3, stride=1, padding=1, bias=True
173
- )
174
-
175
- self.relu = nn.ReLU(inplace=True)
176
-
177
- def forward(self, x):
178
- """Forward pass.
179
-
180
- Args:
181
- x (tensor): input
182
-
183
- Returns:
184
- tensor: output
185
- """
186
- out = self.relu(x)
187
- out = self.conv1(out)
188
- out = self.relu(out)
189
- out = self.conv2(out)
190
-
191
- return out + x
192
-
193
-
194
- class FeatureFusionBlock(nn.Module):
195
- """Feature fusion block.
196
- """
197
-
198
- def __init__(self, features):
199
- """Init.
200
-
201
- Args:
202
- features (int): number of features
203
- """
204
- super(FeatureFusionBlock, self).__init__()
205
-
206
- self.resConfUnit1 = ResidualConvUnit(features)
207
- self.resConfUnit2 = ResidualConvUnit(features)
208
-
209
- def forward(self, *xs):
210
- """Forward pass.
211
-
212
- Returns:
213
- tensor: output
214
- """
215
- output = xs[0]
216
-
217
- if len(xs) == 2:
218
- output += self.resConfUnit1(xs[1])
219
-
220
- output = self.resConfUnit2(output)
221
-
222
- output = nn.functional.interpolate(
223
- output, scale_factor=2, mode="bilinear", align_corners=True
224
- )
225
-
226
- return output
227
-
228
-
229
-
230
-
231
- class ResidualConvUnit_custom(nn.Module):
232
- """Residual convolution module.
233
- """
234
-
235
- def __init__(self, features, activation, bn):
236
- """Init.
237
-
238
- Args:
239
- features (int): number of features
240
- """
241
- super().__init__()
242
-
243
- self.bn = bn
244
-
245
- self.groups=1
246
-
247
- self.conv1 = nn.Conv2d(
248
- features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups
249
- )
250
-
251
- self.conv2 = nn.Conv2d(
252
- features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups
253
- )
254
-
255
- if self.bn==True:
256
- self.bn1 = nn.BatchNorm2d(features)
257
- self.bn2 = nn.BatchNorm2d(features)
258
-
259
- self.activation = activation
260
-
261
- self.skip_add = nn.quantized.FloatFunctional()
262
-
263
- def forward(self, x):
264
- """Forward pass.
265
-
266
- Args:
267
- x (tensor): input
268
-
269
- Returns:
270
- tensor: output
271
- """
272
-
273
- out = self.activation(x)
274
- out = self.conv1(out)
275
- if self.bn==True:
276
- out = self.bn1(out)
277
-
278
- out = self.activation(out)
279
- out = self.conv2(out)
280
- if self.bn==True:
281
- out = self.bn2(out)
282
-
283
- if self.groups > 1:
284
- out = self.conv_merge(out)
285
-
286
- return self.skip_add.add(out, x)
287
-
288
- # return out + x
289
-
290
-
291
- class FeatureFusionBlock_custom(nn.Module):
292
- """Feature fusion block.
293
- """
294
-
295
- def __init__(self, features, activation, deconv=False, bn=False, expand=False, align_corners=True):
296
- """Init.
297
-
298
- Args:
299
- features (int): number of features
300
- """
301
- super(FeatureFusionBlock_custom, self).__init__()
302
-
303
- self.deconv = deconv
304
- self.align_corners = align_corners
305
-
306
- self.groups=1
307
-
308
- self.expand = expand
309
- out_features = features
310
- if self.expand==True:
311
- out_features = features//2
312
-
313
- self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1)
314
-
315
- self.resConfUnit1 = ResidualConvUnit_custom(features, activation, bn)
316
- self.resConfUnit2 = ResidualConvUnit_custom(features, activation, bn)
317
-
318
- self.skip_add = nn.quantized.FloatFunctional()
319
-
320
- def forward(self, *xs):
321
- """Forward pass.
322
-
323
- Returns:
324
- tensor: output
325
- """
326
- output = xs[0]
327
-
328
- if len(xs) == 2:
329
- res = self.resConfUnit1(xs[1])
330
- output = self.skip_add.add(output, res)
331
- # output += res
332
-
333
- output = self.resConfUnit2(output)
334
-
335
- output = nn.functional.interpolate(
336
- output, scale_factor=2, mode="bilinear", align_corners=self.align_corners
337
- )
338
-
339
- output = self.out_conv(output)
340
-
341
- return output
342
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ControlNet/annotator/midas/midas/dpt_depth.py DELETED
@@ -1,109 +0,0 @@
1
- import torch
2
- import torch.nn as nn
3
- import torch.nn.functional as F
4
-
5
- from .base_model import BaseModel
6
- from .blocks import (
7
- FeatureFusionBlock,
8
- FeatureFusionBlock_custom,
9
- Interpolate,
10
- _make_encoder,
11
- forward_vit,
12
- )
13
-
14
-
15
- def _make_fusion_block(features, use_bn):
16
- return FeatureFusionBlock_custom(
17
- features,
18
- nn.ReLU(False),
19
- deconv=False,
20
- bn=use_bn,
21
- expand=False,
22
- align_corners=True,
23
- )
24
-
25
-
26
- class DPT(BaseModel):
27
- def __init__(
28
- self,
29
- head,
30
- features=256,
31
- backbone="vitb_rn50_384",
32
- readout="project",
33
- channels_last=False,
34
- use_bn=False,
35
- ):
36
-
37
- super(DPT, self).__init__()
38
-
39
- self.channels_last = channels_last
40
-
41
- hooks = {
42
- "vitb_rn50_384": [0, 1, 8, 11],
43
- "vitb16_384": [2, 5, 8, 11],
44
- "vitl16_384": [5, 11, 17, 23],
45
- }
46
-
47
- # Instantiate backbone and reassemble blocks
48
- self.pretrained, self.scratch = _make_encoder(
49
- backbone,
50
- features,
51
- False, # Set to true of you want to train from scratch, uses ImageNet weights
52
- groups=1,
53
- expand=False,
54
- exportable=False,
55
- hooks=hooks[backbone],
56
- use_readout=readout,
57
- )
58
-
59
- self.scratch.refinenet1 = _make_fusion_block(features, use_bn)
60
- self.scratch.refinenet2 = _make_fusion_block(features, use_bn)
61
- self.scratch.refinenet3 = _make_fusion_block(features, use_bn)
62
- self.scratch.refinenet4 = _make_fusion_block(features, use_bn)
63
-
64
- self.scratch.output_conv = head
65
-
66
-
67
- def forward(self, x):
68
- if self.channels_last == True:
69
- x.contiguous(memory_format=torch.channels_last)
70
-
71
- layer_1, layer_2, layer_3, layer_4 = forward_vit(self.pretrained, x)
72
-
73
- layer_1_rn = self.scratch.layer1_rn(layer_1)
74
- layer_2_rn = self.scratch.layer2_rn(layer_2)
75
- layer_3_rn = self.scratch.layer3_rn(layer_3)
76
- layer_4_rn = self.scratch.layer4_rn(layer_4)
77
-
78
- path_4 = self.scratch.refinenet4(layer_4_rn)
79
- path_3 = self.scratch.refinenet3(path_4, layer_3_rn)
80
- path_2 = self.scratch.refinenet2(path_3, layer_2_rn)
81
- path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
82
-
83
- out = self.scratch.output_conv(path_1)
84
-
85
- return out
86
-
87
-
88
- class DPTDepthModel(DPT):
89
- def __init__(self, path=None, non_negative=True, **kwargs):
90
- features = kwargs["features"] if "features" in kwargs else 256
91
-
92
- head = nn.Sequential(
93
- nn.Conv2d(features, features // 2, kernel_size=3, stride=1, padding=1),
94
- Interpolate(scale_factor=2, mode="bilinear", align_corners=True),
95
- nn.Conv2d(features // 2, 32, kernel_size=3, stride=1, padding=1),
96
- nn.ReLU(True),
97
- nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0),
98
- nn.ReLU(True) if non_negative else nn.Identity(),
99
- nn.Identity(),
100
- )
101
-
102
- super().__init__(head, **kwargs)
103
-
104
- if path is not None:
105
- self.load(path)
106
-
107
- def forward(self, x):
108
- return super().forward(x).squeeze(dim=1)
109
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ControlNet/annotator/midas/midas/midas_net.py DELETED
@@ -1,76 +0,0 @@
1
- """MidashNet: Network for monocular depth estimation trained by mixing several datasets.
2
- This file contains code that is adapted from
3
- https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py
4
- """
5
- import torch
6
- import torch.nn as nn
7
-
8
- from .base_model import BaseModel
9
- from .blocks import FeatureFusionBlock, Interpolate, _make_encoder
10
-
11
-
12
- class MidasNet(BaseModel):
13
- """Network for monocular depth estimation.
14
- """
15
-
16
- def __init__(self, path=None, features=256, non_negative=True):
17
- """Init.
18
-
19
- Args:
20
- path (str, optional): Path to saved model. Defaults to None.
21
- features (int, optional): Number of features. Defaults to 256.
22
- backbone (str, optional): Backbone network for encoder. Defaults to resnet50
23
- """
24
- print("Loading weights: ", path)
25
-
26
- super(MidasNet, self).__init__()
27
-
28
- use_pretrained = False if path is None else True
29
-
30
- self.pretrained, self.scratch = _make_encoder(backbone="resnext101_wsl", features=features, use_pretrained=use_pretrained)
31
-
32
- self.scratch.refinenet4 = FeatureFusionBlock(features)
33
- self.scratch.refinenet3 = FeatureFusionBlock(features)
34
- self.scratch.refinenet2 = FeatureFusionBlock(features)
35
- self.scratch.refinenet1 = FeatureFusionBlock(features)
36
-
37
- self.scratch.output_conv = nn.Sequential(
38
- nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1),
39
- Interpolate(scale_factor=2, mode="bilinear"),
40
- nn.Conv2d(128, 32, kernel_size=3, stride=1, padding=1),
41
- nn.ReLU(True),
42
- nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0),
43
- nn.ReLU(True) if non_negative else nn.Identity(),
44
- )
45
-
46
- if path:
47
- self.load(path)
48
-
49
- def forward(self, x):
50
- """Forward pass.
51
-
52
- Args:
53
- x (tensor): input data (image)
54
-
55
- Returns:
56
- tensor: depth
57
- """
58
-
59
- layer_1 = self.pretrained.layer1(x)
60
- layer_2 = self.pretrained.layer2(layer_1)
61
- layer_3 = self.pretrained.layer3(layer_2)
62
- layer_4 = self.pretrained.layer4(layer_3)
63
-
64
- layer_1_rn = self.scratch.layer1_rn(layer_1)
65
- layer_2_rn = self.scratch.layer2_rn(layer_2)
66
- layer_3_rn = self.scratch.layer3_rn(layer_3)
67
- layer_4_rn = self.scratch.layer4_rn(layer_4)
68
-
69
- path_4 = self.scratch.refinenet4(layer_4_rn)
70
- path_3 = self.scratch.refinenet3(path_4, layer_3_rn)
71
- path_2 = self.scratch.refinenet2(path_3, layer_2_rn)
72
- path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
73
-
74
- out = self.scratch.output_conv(path_1)
75
-
76
- return torch.squeeze(out, dim=1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ControlNet/annotator/midas/midas/midas_net_custom.py DELETED
@@ -1,128 +0,0 @@
1
- """MidashNet: Network for monocular depth estimation trained by mixing several datasets.
2
- This file contains code that is adapted from
3
- https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py
4
- """
5
- import torch
6
- import torch.nn as nn
7
-
8
- from .base_model import BaseModel
9
- from .blocks import FeatureFusionBlock, FeatureFusionBlock_custom, Interpolate, _make_encoder
10
-
11
-
12
- class MidasNet_small(BaseModel):
13
- """Network for monocular depth estimation.
14
- """
15
-
16
- def __init__(self, path=None, features=64, backbone="efficientnet_lite3", non_negative=True, exportable=True, channels_last=False, align_corners=True,
17
- blocks={'expand': True}):
18
- """Init.
19
-
20
- Args:
21
- path (str, optional): Path to saved model. Defaults to None.
22
- features (int, optional): Number of features. Defaults to 256.
23
- backbone (str, optional): Backbone network for encoder. Defaults to resnet50
24
- """
25
- print("Loading weights: ", path)
26
-
27
- super(MidasNet_small, self).__init__()
28
-
29
- use_pretrained = False if path else True
30
-
31
- self.channels_last = channels_last
32
- self.blocks = blocks
33
- self.backbone = backbone
34
-
35
- self.groups = 1
36
-
37
- features1=features
38
- features2=features
39
- features3=features
40
- features4=features
41
- self.expand = False
42
- if "expand" in self.blocks and self.blocks['expand'] == True:
43
- self.expand = True
44
- features1=features
45
- features2=features*2
46
- features3=features*4
47
- features4=features*8
48
-
49
- self.pretrained, self.scratch = _make_encoder(self.backbone, features, use_pretrained, groups=self.groups, expand=self.expand, exportable=exportable)
50
-
51
- self.scratch.activation = nn.ReLU(False)
52
-
53
- self.scratch.refinenet4 = FeatureFusionBlock_custom(features4, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners)
54
- self.scratch.refinenet3 = FeatureFusionBlock_custom(features3, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners)
55
- self.scratch.refinenet2 = FeatureFusionBlock_custom(features2, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners)
56
- self.scratch.refinenet1 = FeatureFusionBlock_custom(features1, self.scratch.activation, deconv=False, bn=False, align_corners=align_corners)
57
-
58
-
59
- self.scratch.output_conv = nn.Sequential(
60
- nn.Conv2d(features, features//2, kernel_size=3, stride=1, padding=1, groups=self.groups),
61
- Interpolate(scale_factor=2, mode="bilinear"),
62
- nn.Conv2d(features//2, 32, kernel_size=3, stride=1, padding=1),
63
- self.scratch.activation,
64
- nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0),
65
- nn.ReLU(True) if non_negative else nn.Identity(),
66
- nn.Identity(),
67
- )
68
-
69
- if path:
70
- self.load(path)
71
-
72
-
73
- def forward(self, x):
74
- """Forward pass.
75
-
76
- Args:
77
- x (tensor): input data (image)
78
-
79
- Returns:
80
- tensor: depth
81
- """
82
- if self.channels_last==True:
83
- print("self.channels_last = ", self.channels_last)
84
- x.contiguous(memory_format=torch.channels_last)
85
-
86
-
87
- layer_1 = self.pretrained.layer1(x)
88
- layer_2 = self.pretrained.layer2(layer_1)
89
- layer_3 = self.pretrained.layer3(layer_2)
90
- layer_4 = self.pretrained.layer4(layer_3)
91
-
92
- layer_1_rn = self.scratch.layer1_rn(layer_1)
93
- layer_2_rn = self.scratch.layer2_rn(layer_2)
94
- layer_3_rn = self.scratch.layer3_rn(layer_3)
95
- layer_4_rn = self.scratch.layer4_rn(layer_4)
96
-
97
-
98
- path_4 = self.scratch.refinenet4(layer_4_rn)
99
- path_3 = self.scratch.refinenet3(path_4, layer_3_rn)
100
- path_2 = self.scratch.refinenet2(path_3, layer_2_rn)
101
- path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
102
-
103
- out = self.scratch.output_conv(path_1)
104
-
105
- return torch.squeeze(out, dim=1)
106
-
107
-
108
-
109
- def fuse_model(m):
110
- prev_previous_type = nn.Identity()
111
- prev_previous_name = ''
112
- previous_type = nn.Identity()
113
- previous_name = ''
114
- for name, module in m.named_modules():
115
- if prev_previous_type == nn.Conv2d and previous_type == nn.BatchNorm2d and type(module) == nn.ReLU:
116
- # print("FUSED ", prev_previous_name, previous_name, name)
117
- torch.quantization.fuse_modules(m, [prev_previous_name, previous_name, name], inplace=True)
118
- elif prev_previous_type == nn.Conv2d and previous_type == nn.BatchNorm2d:
119
- # print("FUSED ", prev_previous_name, previous_name)
120
- torch.quantization.fuse_modules(m, [prev_previous_name, previous_name], inplace=True)
121
- # elif previous_type == nn.Conv2d and type(module) == nn.ReLU:
122
- # print("FUSED ", previous_name, name)
123
- # torch.quantization.fuse_modules(m, [previous_name, name], inplace=True)
124
-
125
- prev_previous_type = previous_type
126
- prev_previous_name = previous_name
127
- previous_type = type(module)
128
- previous_name = name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ControlNet/annotator/midas/midas/transforms.py DELETED
@@ -1,234 +0,0 @@
1
- import numpy as np
2
- import cv2
3
- import math
4
-
5
-
6
- def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA):
7
- """Rezise the sample to ensure the given size. Keeps aspect ratio.
8
-
9
- Args:
10
- sample (dict): sample
11
- size (tuple): image size
12
-
13
- Returns:
14
- tuple: new size
15
- """
16
- shape = list(sample["disparity"].shape)
17
-
18
- if shape[0] >= size[0] and shape[1] >= size[1]:
19
- return sample
20
-
21
- scale = [0, 0]
22
- scale[0] = size[0] / shape[0]
23
- scale[1] = size[1] / shape[1]
24
-
25
- scale = max(scale)
26
-
27
- shape[0] = math.ceil(scale * shape[0])
28
- shape[1] = math.ceil(scale * shape[1])
29
-
30
- # resize
31
- sample["image"] = cv2.resize(
32
- sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method
33
- )
34
-
35
- sample["disparity"] = cv2.resize(
36
- sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST
37
- )
38
- sample["mask"] = cv2.resize(
39
- sample["mask"].astype(np.float32),
40
- tuple(shape[::-1]),
41
- interpolation=cv2.INTER_NEAREST,
42
- )
43
- sample["mask"] = sample["mask"].astype(bool)
44
-
45
- return tuple(shape)
46
-
47
-
48
- class Resize(object):
49
- """Resize sample to given size (width, height).
50
- """
51
-
52
- def __init__(
53
- self,
54
- width,
55
- height,
56
- resize_target=True,
57
- keep_aspect_ratio=False,
58
- ensure_multiple_of=1,
59
- resize_method="lower_bound",
60
- image_interpolation_method=cv2.INTER_AREA,
61
- ):
62
- """Init.
63
-
64
- Args:
65
- width (int): desired output width
66
- height (int): desired output height
67
- resize_target (bool, optional):
68
- True: Resize the full sample (image, mask, target).
69
- False: Resize image only.
70
- Defaults to True.
71
- keep_aspect_ratio (bool, optional):
72
- True: Keep the aspect ratio of the input sample.
73
- Output sample might not have the given width and height, and
74
- resize behaviour depends on the parameter 'resize_method'.
75
- Defaults to False.
76
- ensure_multiple_of (int, optional):
77
- Output width and height is constrained to be multiple of this parameter.
78
- Defaults to 1.
79
- resize_method (str, optional):
80
- "lower_bound": Output will be at least as large as the given size.
81
- "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
82
- "minimal": Scale as least as possible. (Output size might be smaller than given size.)
83
- Defaults to "lower_bound".
84
- """
85
- self.__width = width
86
- self.__height = height
87
-
88
- self.__resize_target = resize_target
89
- self.__keep_aspect_ratio = keep_aspect_ratio
90
- self.__multiple_of = ensure_multiple_of
91
- self.__resize_method = resize_method
92
- self.__image_interpolation_method = image_interpolation_method
93
-
94
- def constrain_to_multiple_of(self, x, min_val=0, max_val=None):
95
- y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int)
96
-
97
- if max_val is not None and y > max_val:
98
- y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int)
99
-
100
- if y < min_val:
101
- y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int)
102
-
103
- return y
104
-
105
- def get_size(self, width, height):
106
- # determine new height and width
107
- scale_height = self.__height / height
108
- scale_width = self.__width / width
109
-
110
- if self.__keep_aspect_ratio:
111
- if self.__resize_method == "lower_bound":
112
- # scale such that output size is lower bound
113
- if scale_width > scale_height:
114
- # fit width
115
- scale_height = scale_width
116
- else:
117
- # fit height
118
- scale_width = scale_height
119
- elif self.__resize_method == "upper_bound":
120
- # scale such that output size is upper bound
121
- if scale_width < scale_height:
122
- # fit width
123
- scale_height = scale_width
124
- else:
125
- # fit height
126
- scale_width = scale_height
127
- elif self.__resize_method == "minimal":
128
- # scale as least as possbile
129
- if abs(1 - scale_width) < abs(1 - scale_height):
130
- # fit width
131
- scale_height = scale_width
132
- else:
133
- # fit height
134
- scale_width = scale_height
135
- else:
136
- raise ValueError(
137
- f"resize_method {self.__resize_method} not implemented"
138
- )
139
-
140
- if self.__resize_method == "lower_bound":
141
- new_height = self.constrain_to_multiple_of(
142
- scale_height * height, min_val=self.__height
143
- )
144
- new_width = self.constrain_to_multiple_of(
145
- scale_width * width, min_val=self.__width
146
- )
147
- elif self.__resize_method == "upper_bound":
148
- new_height = self.constrain_to_multiple_of(
149
- scale_height * height, max_val=self.__height
150
- )
151
- new_width = self.constrain_to_multiple_of(
152
- scale_width * width, max_val=self.__width
153
- )
154
- elif self.__resize_method == "minimal":
155
- new_height = self.constrain_to_multiple_of(scale_height * height)
156
- new_width = self.constrain_to_multiple_of(scale_width * width)
157
- else:
158
- raise ValueError(f"resize_method {self.__resize_method} not implemented")
159
-
160
- return (new_width, new_height)
161
-
162
- def __call__(self, sample):
163
- width, height = self.get_size(
164
- sample["image"].shape[1], sample["image"].shape[0]
165
- )
166
-
167
- # resize sample
168
- sample["image"] = cv2.resize(
169
- sample["image"],
170
- (width, height),
171
- interpolation=self.__image_interpolation_method,
172
- )
173
-
174
- if self.__resize_target:
175
- if "disparity" in sample:
176
- sample["disparity"] = cv2.resize(
177
- sample["disparity"],
178
- (width, height),
179
- interpolation=cv2.INTER_NEAREST,
180
- )
181
-
182
- if "depth" in sample:
183
- sample["depth"] = cv2.resize(
184
- sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST
185
- )
186
-
187
- sample["mask"] = cv2.resize(
188
- sample["mask"].astype(np.float32),
189
- (width, height),
190
- interpolation=cv2.INTER_NEAREST,
191
- )
192
- sample["mask"] = sample["mask"].astype(bool)
193
-
194
- return sample
195
-
196
-
197
- class NormalizeImage(object):
198
- """Normlize image by given mean and std.
199
- """
200
-
201
- def __init__(self, mean, std):
202
- self.__mean = mean
203
- self.__std = std
204
-
205
- def __call__(self, sample):
206
- sample["image"] = (sample["image"] - self.__mean) / self.__std
207
-
208
- return sample
209
-
210
-
211
- class PrepareForNet(object):
212
- """Prepare sample for usage as network input.
213
- """
214
-
215
- def __init__(self):
216
- pass
217
-
218
- def __call__(self, sample):
219
- image = np.transpose(sample["image"], (2, 0, 1))
220
- sample["image"] = np.ascontiguousarray(image).astype(np.float32)
221
-
222
- if "mask" in sample:
223
- sample["mask"] = sample["mask"].astype(np.float32)
224
- sample["mask"] = np.ascontiguousarray(sample["mask"])
225
-
226
- if "disparity" in sample:
227
- disparity = sample["disparity"].astype(np.float32)
228
- sample["disparity"] = np.ascontiguousarray(disparity)
229
-
230
- if "depth" in sample:
231
- depth = sample["depth"].astype(np.float32)
232
- sample["depth"] = np.ascontiguousarray(depth)
233
-
234
- return sample
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ControlNet/annotator/midas/midas/vit.py DELETED
@@ -1,491 +0,0 @@
1
- import torch
2
- import torch.nn as nn
3
- import timm
4
- import types
5
- import math
6
- import torch.nn.functional as F
7
-
8
-
9
- class Slice(nn.Module):
10
- def __init__(self, start_index=1):
11
- super(Slice, self).__init__()
12
- self.start_index = start_index
13
-
14
- def forward(self, x):
15
- return x[:, self.start_index :]
16
-
17
-
18
- class AddReadout(nn.Module):
19
- def __init__(self, start_index=1):
20
- super(AddReadout, self).__init__()
21
- self.start_index = start_index
22
-
23
- def forward(self, x):
24
- if self.start_index == 2:
25
- readout = (x[:, 0] + x[:, 1]) / 2
26
- else:
27
- readout = x[:, 0]
28
- return x[:, self.start_index :] + readout.unsqueeze(1)
29
-
30
-
31
- class ProjectReadout(nn.Module):
32
- def __init__(self, in_features, start_index=1):
33
- super(ProjectReadout, self).__init__()
34
- self.start_index = start_index
35
-
36
- self.project = nn.Sequential(nn.Linear(2 * in_features, in_features), nn.GELU())
37
-
38
- def forward(self, x):
39
- readout = x[:, 0].unsqueeze(1).expand_as(x[:, self.start_index :])
40
- features = torch.cat((x[:, self.start_index :], readout), -1)
41
-
42
- return self.project(features)
43
-
44
-
45
- class Transpose(nn.Module):
46
- def __init__(self, dim0, dim1):
47
- super(Transpose, self).__init__()
48
- self.dim0 = dim0
49
- self.dim1 = dim1
50
-
51
- def forward(self, x):
52
- x = x.transpose(self.dim0, self.dim1)
53
- return x
54
-
55
-
56
- def forward_vit(pretrained, x):
57
- b, c, h, w = x.shape
58
-
59
- glob = pretrained.model.forward_flex(x)
60
-
61
- layer_1 = pretrained.activations["1"]
62
- layer_2 = pretrained.activations["2"]
63
- layer_3 = pretrained.activations["3"]
64
- layer_4 = pretrained.activations["4"]
65
-
66
- layer_1 = pretrained.act_postprocess1[0:2](layer_1)
67
- layer_2 = pretrained.act_postprocess2[0:2](layer_2)
68
- layer_3 = pretrained.act_postprocess3[0:2](layer_3)
69
- layer_4 = pretrained.act_postprocess4[0:2](layer_4)
70
-
71
- unflatten = nn.Sequential(
72
- nn.Unflatten(
73
- 2,
74
- torch.Size(
75
- [
76
- h // pretrained.model.patch_size[1],
77
- w // pretrained.model.patch_size[0],
78
- ]
79
- ),
80
- )
81
- )
82
-
83
- if layer_1.ndim == 3:
84
- layer_1 = unflatten(layer_1)
85
- if layer_2.ndim == 3:
86
- layer_2 = unflatten(layer_2)
87
- if layer_3.ndim == 3:
88
- layer_3 = unflatten(layer_3)
89
- if layer_4.ndim == 3:
90
- layer_4 = unflatten(layer_4)
91
-
92
- layer_1 = pretrained.act_postprocess1[3 : len(pretrained.act_postprocess1)](layer_1)
93
- layer_2 = pretrained.act_postprocess2[3 : len(pretrained.act_postprocess2)](layer_2)
94
- layer_3 = pretrained.act_postprocess3[3 : len(pretrained.act_postprocess3)](layer_3)
95
- layer_4 = pretrained.act_postprocess4[3 : len(pretrained.act_postprocess4)](layer_4)
96
-
97
- return layer_1, layer_2, layer_3, layer_4
98
-
99
-
100
- def _resize_pos_embed(self, posemb, gs_h, gs_w):
101
- posemb_tok, posemb_grid = (
102
- posemb[:, : self.start_index],
103
- posemb[0, self.start_index :],
104
- )
105
-
106
- gs_old = int(math.sqrt(len(posemb_grid)))
107
-
108
- posemb_grid = posemb_grid.reshape(1, gs_old, gs_old, -1).permute(0, 3, 1, 2)
109
- posemb_grid = F.interpolate(posemb_grid, size=(gs_h, gs_w), mode="bilinear")
110
- posemb_grid = posemb_grid.permute(0, 2, 3, 1).reshape(1, gs_h * gs_w, -1)
111
-
112
- posemb = torch.cat([posemb_tok, posemb_grid], dim=1)
113
-
114
- return posemb
115
-
116
-
117
- def forward_flex(self, x):
118
- b, c, h, w = x.shape
119
-
120
- pos_embed = self._resize_pos_embed(
121
- self.pos_embed, h // self.patch_size[1], w // self.patch_size[0]
122
- )
123
-
124
- B = x.shape[0]
125
-
126
- if hasattr(self.patch_embed, "backbone"):
127
- x = self.patch_embed.backbone(x)
128
- if isinstance(x, (list, tuple)):
129
- x = x[-1] # last feature if backbone outputs list/tuple of features
130
-
131
- x = self.patch_embed.proj(x).flatten(2).transpose(1, 2)
132
-
133
- if getattr(self, "dist_token", None) is not None:
134
- cls_tokens = self.cls_token.expand(
135
- B, -1, -1
136
- ) # stole cls_tokens impl from Phil Wang, thanks
137
- dist_token = self.dist_token.expand(B, -1, -1)
138
- x = torch.cat((cls_tokens, dist_token, x), dim=1)
139
- else:
140
- cls_tokens = self.cls_token.expand(
141
- B, -1, -1
142
- ) # stole cls_tokens impl from Phil Wang, thanks
143
- x = torch.cat((cls_tokens, x), dim=1)
144
-
145
- x = x + pos_embed
146
- x = self.pos_drop(x)
147
-
148
- for blk in self.blocks:
149
- x = blk(x)
150
-
151
- x = self.norm(x)
152
-
153
- return x
154
-
155
-
156
- activations = {}
157
-
158
-
159
- def get_activation(name):