freealise commited on
Commit
00e81be
1 Parent(s): ac1033d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -52
app.py CHANGED
@@ -17,11 +17,27 @@ from vincenty import vincenty
17
  import json
18
  #import DracoPy
19
  from collections import Counter
20
-
21
- from depth_anything.dpt import DepthAnything
22
- from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
23
  import mediapy
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  edge = []
26
  gradient = None
27
  params = { "fnum":0, "l":16 }
@@ -54,43 +70,53 @@ def create_video(frames, fps, type):
54
  return type + "_result.mp4"
55
 
56
  @torch.no_grad()
57
- def predict_depth(model, image):
58
- return model(image)["depth"]
59
-
60
  #@spaces.GPU
 
 
 
 
 
 
61
  def make_video(video_path, outdir='./vis_video_depth', encoder='vits', remove_bg=False):
62
  if encoder not in ["vitl","vitb","vits"]:
63
  encoder = "vits"
64
 
65
- mapper = {"vits":"small","vitb":"base","vitl":"large"}
 
 
 
 
 
 
 
66
  # DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
67
  # model = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(DEVICE).eval()
68
  # Define path for temporary processed frames
69
- temp_frame_dir = tempfile.mkdtemp()
70
 
71
- margin_width = 50
72
- to_tensor_transform = transforms.ToTensor()
73
 
74
- DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
75
  # depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_{}14'.format(encoder)).to(DEVICE).eval()
76
- depth_anything = pipeline(task = "depth-estimation", model=f"nielsr/depth-anything-{mapper[encoder]}")
77
 
78
  # total_params = sum(param.numel() for param in depth_anything.parameters())
79
  # print('Total parameters: {:.2f}M'.format(total_params / 1e6))
80
 
81
- transform = Compose([
82
- Resize(
83
- width=518,
84
- height=518,
85
- resize_target=False,
86
- keep_aspect_ratio=True,
87
- ensure_multiple_of=14,
88
- resize_method='lower_bound',
89
- image_interpolation_method=cv2.INTER_CUBIC,
90
- ),
91
- NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
92
- PrepareForNet(),
93
- ])
94
 
95
  if os.path.isfile(video_path):
96
  if video_path.endswith('txt'):
@@ -144,19 +170,23 @@ def make_video(video_path, outdir='./vis_video_depth', encoder='vits', remove_bg
144
 
145
  frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2RGB) / 255.0
146
  frame_pil = Image.fromarray((frame * 255).astype(np.uint8))
147
- frame = transform({'image': frame})['image']
148
-
149
- frame = torch.from_numpy(frame).unsqueeze(0).to(DEVICE)
150
-
151
-
152
- depth = to_tensor_transform(predict_depth(depth_anything, frame_pil))
153
 
154
- depth = F.interpolate(depth[None], (frame_height, frame_width), mode='bilinear', align_corners=False)[0, 0]
 
155
  depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
156
-
157
- depth = depth.cpu().numpy().astype(np.uint8)
158
- depth_color = cv2.applyColorMap(depth, cv2.COLORMAP_BONE)
159
  depth_gray = cv2.cvtColor(depth_color, cv2.COLOR_RGBA2GRAY)
 
 
 
 
 
 
 
 
160
 
161
  # Remove white border around map:
162
  # define lower and upper limits of white
@@ -896,23 +926,24 @@ css = """
896
  }
897
  """
898
 
899
- title = "# Depth Anything Video Demo"
900
- description = """Depth Anything on full video files.
901
- Please refer to our [paper](https://arxiv.org/abs/2401.10891), [project page](https://depth-anything.github.io), or [github](https://github.com/LiheYoung/Depth-Anything) for more details."""
902
-
903
- transform = Compose([
904
- Resize(
905
- width=518,
906
- height=518,
907
- resize_target=False,
908
- keep_aspect_ratio=True,
909
- ensure_multiple_of=14,
910
- resize_method='lower_bound',
911
- image_interpolation_method=cv2.INTER_CUBIC,
912
- ),
913
- NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
914
- PrepareForNet(),
915
- ])
 
916
 
917
  # @torch.no_grad()
918
  # def predict_depth(model, image):
 
17
  import json
18
  #import DracoPy
19
  from collections import Counter
 
 
 
20
  import mediapy
21
 
22
+ #from depth_anything.dpt import DepthAnything
23
+ #from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
24
+ from huggingface_hub import hf_hub_download
25
+ from depth_anything_v2.dpt import DepthAnythingV2
26
+
27
+ DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
28
+ model_configs = {
29
+ 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
30
+ 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
31
+ 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
32
+ 'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
33
+ }
34
+ encoder2name = {
35
+ 'vits': 'Small',
36
+ 'vitb': 'Base',
37
+ 'vitl': 'Large',
38
+ 'vitg': 'Giant', # we are undergoing company review procedures to release our giant model checkpoint
39
+ }
40
+
41
  edge = []
42
  gradient = None
43
  params = { "fnum":0, "l":16 }
 
70
  return type + "_result.mp4"
71
 
72
  @torch.no_grad()
 
 
 
73
  #@spaces.GPU
74
+ def predict_depth(image, model):
75
+ return model.infer_image(image)
76
+
77
+ #def predict_depth(model, image):
78
+ # return model(image)["depth"]
79
+
80
  def make_video(video_path, outdir='./vis_video_depth', encoder='vits', remove_bg=False):
81
  if encoder not in ["vitl","vitb","vits"]:
82
  encoder = "vits"
83
 
84
+ model_name = encoder2name[encoder]
85
+ model = DepthAnythingV2(**model_configs[encoder])
86
+ filepath = hf_hub_download(repo_id=f"depth-anything/Depth-Anything-V2-{model_name}", filename=f"depth_anything_v2_{encoder}.pth", repo_type="model")
87
+ state_dict = torch.load(filepath, map_location="cpu")
88
+ model.load_state_dict(state_dict)
89
+ model = model.to(DEVICE).eval()
90
+
91
+ #mapper = {"vits":"small","vitb":"base","vitl":"large"}
92
  # DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
93
  # model = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(DEVICE).eval()
94
  # Define path for temporary processed frames
95
+ #temp_frame_dir = tempfile.mkdtemp()
96
 
97
+ #margin_width = 50
98
+ #to_tensor_transform = transforms.ToTensor()
99
 
100
+ #DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
101
  # depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_{}14'.format(encoder)).to(DEVICE).eval()
102
+ #depth_anything = pipeline(task = "depth-estimation", model=f"nielsr/depth-anything-{mapper[encoder]}")
103
 
104
  # total_params = sum(param.numel() for param in depth_anything.parameters())
105
  # print('Total parameters: {:.2f}M'.format(total_params / 1e6))
106
 
107
+ #transform = Compose([
108
+ # Resize(
109
+ # width=518,
110
+ # height=518,
111
+ # resize_target=False,
112
+ # keep_aspect_ratio=True,
113
+ # ensure_multiple_of=14,
114
+ # resize_method='lower_bound',
115
+ # image_interpolation_method=cv2.INTER_CUBIC,
116
+ # ),
117
+ # NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
118
+ # PrepareForNet(),
119
+ #])
120
 
121
  if os.path.isfile(video_path):
122
  if video_path.endswith('txt'):
 
170
 
171
  frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2RGB) / 255.0
172
  frame_pil = Image.fromarray((frame * 255).astype(np.uint8))
173
+ #frame = transform({'image': frame})['image']
174
+ #frame = torch.from_numpy(frame).unsqueeze(0).to(DEVICE)
 
 
 
 
175
 
176
+ #
177
+ depth = predict_depth(raw_frame[:, :, ::-1], model)
178
  depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
179
+ depth = depth.astype(np.uint8)
180
+ depth_color = Image.fromarray(depth)
 
181
  depth_gray = cv2.cvtColor(depth_color, cv2.COLOR_RGBA2GRAY)
182
+ #
183
+
184
+ #depth = to_tensor_transform(predict_depth(depth_anything, frame_pil))
185
+ #depth = F.interpolate(depth[None], (frame_height, frame_width), mode='bilinear', align_corners=False)[0, 0]
186
+ #depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
187
+ #depth = depth.cpu().numpy().astype(np.uint8)
188
+ #depth_color = cv2.applyColorMap(depth, cv2.COLORMAP_BONE)
189
+ #depth_gray = cv2.cvtColor(depth_color, cv2.COLOR_RGBA2GRAY)
190
 
191
  # Remove white border around map:
192
  # define lower and upper limits of white
 
926
  }
927
  """
928
 
929
+ title = "# Depth Anything V2 Video"
930
+ description = """**Depth Anything V2** on full video files.
931
+ Please refer to our [paper](https://arxiv.org/abs/2406.09414), [project page](https://depth-anything-v2.github.io), and [github](https://github.com/DepthAnything/Depth-Anything-V2) for more details."""
932
+
933
+
934
+ #transform = Compose([
935
+ # Resize(
936
+ # width=518,
937
+ # height=518,
938
+ # resize_target=False,
939
+ # keep_aspect_ratio=True,
940
+ # ensure_multiple_of=14,
941
+ # resize_method='lower_bound',
942
+ # image_interpolation_method=cv2.INTER_CUBIC,
943
+ # ),
944
+ # NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
945
+ # PrepareForNet(),
946
+ #])
947
 
948
  # @torch.no_grad()
949
  # def predict_depth(model, image):