JohanDL commited on
Commit
545659d
1 Parent(s): 02c2cf8

Initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.png filter=lfs diff=lfs merge=lfs -text
37
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
38
+ *.pdf filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import cv2
3
+ import numpy as np
4
+ import os
5
+ import torch
6
+ import torch.nn.functional as F
7
+ from torchvision.transforms import Compose
8
+ import tempfile
9
+
10
+ from depth_anything.dpt import DepthAnything
11
+ from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
12
+
13
+ def make_video(video_path, outdir='./vis_video_depth',encoder='vitl'):
14
+ # Define path for temporary processed frames
15
+ temp_frame_dir = tempfile.mkdtemp()
16
+
17
+ margin_width = 50
18
+
19
+ DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
20
+
21
+ depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_{}14'.format(encoder)).to(DEVICE).eval()
22
+
23
+ total_params = sum(param.numel() for param in depth_anything.parameters())
24
+ print('Total parameters: {:.2f}M'.format(total_params / 1e6))
25
+
26
+ transform = Compose([
27
+ Resize(
28
+ width=518,
29
+ height=518,
30
+ resize_target=False,
31
+ keep_aspect_ratio=True,
32
+ ensure_multiple_of=14,
33
+ resize_method='lower_bound',
34
+ image_interpolation_method=cv2.INTER_CUBIC,
35
+ ),
36
+ NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
37
+ PrepareForNet(),
38
+ ])
39
+
40
+ if os.path.isfile(video_path):
41
+ if video_path.endswith('txt'):
42
+ with open(video_path, 'r') as f:
43
+ lines = f.read().splitlines()
44
+ else:
45
+ filenames = [video_path]
46
+ else:
47
+ filenames = os.listdir(video_path)
48
+ filenames = [os.path.join(video_path, filename) for filename in filenames if not filename.startswith('.')]
49
+ filenames.sort()
50
+
51
+ # os.makedirs(outdir, exist_ok=True)
52
+
53
+ for k, filename in enumerate(filenames):
54
+ print('Progress {:}/{:},'.format(k+1, len(filenames)), 'Processing', filename)
55
+
56
+ raw_video = cv2.VideoCapture(filename)
57
+ frame_width, frame_height = int(raw_video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(raw_video.get(cv2.CAP_PROP_FRAME_HEIGHT))
58
+ frame_rate = int(raw_video.get(cv2.CAP_PROP_FPS))
59
+ output_width = frame_width * 2 + margin_width
60
+
61
+ filename = os.path.basename(filename)
62
+ # output_path = os.path.join(outdir, filename[:filename.rfind('.')] + '_video_depth.mp4')
63
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmpfile:
64
+ output_path = tmpfile.name
65
+ #out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"avc1"), frame_rate, (output_width, frame_height))
66
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
67
+ out = cv2.VideoWriter(output_path, fourcc, frame_rate, (output_width, frame_height))
68
+ # count=0
69
+ while raw_video.isOpened():
70
+ ret, raw_frame = raw_video.read()
71
+ if not ret:
72
+ break
73
+
74
+ frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2RGB) / 255.0
75
+
76
+ frame = transform({'image': frame})['image']
77
+ frame = torch.from_numpy(frame).unsqueeze(0).to(DEVICE)
78
+
79
+ with torch.no_grad():
80
+ depth = depth_anything(frame)
81
+
82
+ depth = F.interpolate(depth[None], (frame_height, frame_width), mode='bilinear', align_corners=False)[0, 0]
83
+ depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
84
+
85
+ depth = depth.cpu().numpy().astype(np.uint8)
86
+ depth_color = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)
87
+
88
+ split_region = np.ones((frame_height, margin_width, 3), dtype=np.uint8) * 255
89
+ combined_frame = cv2.hconcat([raw_frame, split_region, depth_color])
90
+
91
+ # out.write(combined_frame)
92
+ # frame_path = os.path.join(temp_frame_dir, f"frame_{count:05d}.png")
93
+ # cv2.imwrite(frame_path, combined_frame)
94
+ out.write(combined_frame)
95
+ # count += 1
96
+
97
+ raw_video.release()
98
+ out.release()
99
+ return output_path
100
+
101
+ css = """
102
+ #img-display-container {
103
+ max-height: 100vh;
104
+ }
105
+ #img-display-input {
106
+ max-height: 80vh;
107
+ }
108
+ #img-display-output {
109
+ max-height: 80vh;
110
+ }
111
+ """
112
+ DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
113
+ model = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(DEVICE).eval()
114
+
115
+ title = "# Depth Anything Video Demo"
116
+ description = """Depth Anything on full video files.
117
+
118
+ Please refer to our [paper](https://arxiv.org/abs/2401.10891), [project page](https://depth-anything.github.io), or [github](https://github.com/LiheYoung/Depth-Anything) for more details."""
119
+
120
+ transform = Compose([
121
+ Resize(
122
+ width=518,
123
+ height=518,
124
+ resize_target=False,
125
+ keep_aspect_ratio=True,
126
+ ensure_multiple_of=14,
127
+ resize_method='lower_bound',
128
+ image_interpolation_method=cv2.INTER_CUBIC,
129
+ ),
130
+ NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
131
+ PrepareForNet(),
132
+ ])
133
+
134
+ @torch.no_grad()
135
+ def predict_depth(model, image):
136
+ return model(image)
137
+
138
+ with gr.Blocks(css=css) as demo:
139
+ gr.Markdown(title)
140
+ gr.Markdown(description)
141
+ gr.Markdown("### Video Depth Prediction demo")
142
+
143
+ with gr.Row():
144
+ input_video = gr.Video(label="Input Video")
145
+ submit = gr.Button("Submit")
146
+ processed_video = gr.Video(label="Processed Video")
147
+
148
+ def on_submit(uploaded_video):
149
+
150
+ # Process the video and get the path of the output video
151
+ output_video_path = make_video(uploaded_video)
152
+
153
+ return output_video_path
154
+
155
+ submit.click(on_submit, inputs=[input_video], outputs=processed_video)
156
+
157
+ example_files = os.listdir('assets/examples_video')
158
+ example_files.sort()
159
+ example_files = [os.path.join('assets/examples_video', filename) for filename in example_files]
160
+ examples = gr.Examples(examples=example_files, inputs=[input_video], outputs=processed_video, fn=on_submit, cache_examples=False)
161
+
162
+
163
+ if __name__ == '__main__':
164
+ demo.queue().launch()
assets/controlnet_demo1.png ADDED

Git LFS Details

  • SHA256: c1108a8f624dba50b6400b39a3709c6d92c30ab874cdd9b9d7f0538243efdc6a
  • Pointer size: 132 Bytes
  • Size of remote file: 4.21 MB
assets/controlnet_demo2.png ADDED

Git LFS Details

  • SHA256: 9b335afcf4e835d1ffaedf539fb4add4bd08bd591468c63e57f92c7c0c830c62
  • Pointer size: 132 Bytes
  • Size of remote file: 4.11 MB
assets/examples/demo1.png ADDED

Git LFS Details

  • SHA256: 90f9d82d98ef71d8f54dbb9eb680cc98f40869428442c3f64d8eb4bced15c4bd
  • Pointer size: 131 Bytes
  • Size of remote file: 884 kB
assets/examples/demo10.png ADDED

Git LFS Details

  • SHA256: f08cc7df399c682af247f7b4d11957ed8e18349e27de7bee09fa026ceb4ca46a
  • Pointer size: 132 Bytes
  • Size of remote file: 4.05 MB
assets/examples/demo11.png ADDED

Git LFS Details

  • SHA256: 8c0faac2d617c958efeb21a21acba6746e245d187f2d27ed7b66969302201086
  • Pointer size: 132 Bytes
  • Size of remote file: 4.62 MB
assets/examples/demo12.png ADDED

Git LFS Details

  • SHA256: 7b16d0b55d3d7a9d0b94f904e32c34f93daebdb224375b578082a8e3b047fcef
  • Pointer size: 132 Bytes
  • Size of remote file: 4.6 MB
assets/examples/demo13.png ADDED

Git LFS Details

  • SHA256: 490b50afa12b16b6200ff0d9ae4b61b55a59601bbc3f125dfdfb09629a389fa5
  • Pointer size: 132 Bytes
  • Size of remote file: 6.19 MB
assets/examples/demo14.png ADDED

Git LFS Details

  • SHA256: 725a2f05bcc492c99be5f1ae47f21b1d5053cf0629b7858352ceb3eec80ef49b
  • Pointer size: 132 Bytes
  • Size of remote file: 6.62 MB
assets/examples/demo15.png ADDED

Git LFS Details

  • SHA256: 39e941a6a9079f3f9a9278b67edfefdf3375e0782f5eddbc0373e9889fba4fba
  • Pointer size: 132 Bytes
  • Size of remote file: 8.13 MB
assets/examples/demo16.png ADDED

Git LFS Details

  • SHA256: b6555edaaff8ed240b242d4d9775de364ddaafb7e2e4241e8db1120b863b8dba
  • Pointer size: 132 Bytes
  • Size of remote file: 5.6 MB
assets/examples/demo17.png ADDED

Git LFS Details

  • SHA256: 83f65587f843355759a216577776a2a8f3540bfda7f5c73753e57e9c7ccd59a6
  • Pointer size: 132 Bytes
  • Size of remote file: 2.82 MB
assets/examples/demo18.png ADDED

Git LFS Details

  • SHA256: 96a2c3ef316d940b076aaf230783afffa3921def745bf0f20cbd85f6c0cd5689
  • Pointer size: 132 Bytes
  • Size of remote file: 6.69 MB
assets/examples/demo19.png ADDED

Git LFS Details

  • SHA256: 06569e1790b161446b01aaadf609db8fb66593c487969adb79068d8df1c92364
  • Pointer size: 132 Bytes
  • Size of remote file: 6.3 MB
assets/examples/demo2.png ADDED

Git LFS Details

  • SHA256: b2ac0ec64c4d274dd94af5956dfc14eff636af6b424a238d8942d928965c8c59
  • Pointer size: 132 Bytes
  • Size of remote file: 1.3 MB
assets/examples/demo20.png ADDED

Git LFS Details

  • SHA256: 819440ec686e94a11697bb80f6585a7a52fdf216f6125a99f2555ceee684039b
  • Pointer size: 132 Bytes
  • Size of remote file: 5.99 MB
assets/examples/demo3.png ADDED

Git LFS Details

  • SHA256: 6ccf6333cff5c2f72ba038c1ba426671d405707b27207d369b5bfc3a61083d41
  • Pointer size: 132 Bytes
  • Size of remote file: 1.05 MB
assets/examples/demo4.png ADDED

Git LFS Details

  • SHA256: 2bfdb6c82cefab57447683f8c1e0def9a7bd1357c7c72266d6bc7c78cfceb2e1
  • Pointer size: 132 Bytes
  • Size of remote file: 4.96 MB
assets/examples/demo5.png ADDED

Git LFS Details

  • SHA256: 9f684d3797148b005e8219f0979d066e8629fe8ffe982e06e6548fa0d8f00973
  • Pointer size: 132 Bytes
  • Size of remote file: 5.22 MB
assets/examples/demo7.png ADDED

Git LFS Details

  • SHA256: e0dd6854153a5c7589503fd681568b21fd4c41b4e1e64ed7e687542bf8de2ed4
  • Pointer size: 132 Bytes
  • Size of remote file: 4.7 MB
assets/examples/demo8.png ADDED

Git LFS Details

  • SHA256: a23dec9695cff9b57e4f4d5c119fad8887a6e7caad99de207d823e5278498fcd
  • Pointer size: 132 Bytes
  • Size of remote file: 3.94 MB
assets/examples/demo9.png ADDED

Git LFS Details

  • SHA256: 5d886953fac1f826e812cf62f0cd173a406c5d4ca8b8d9298ccad20dcf283833
  • Pointer size: 132 Bytes
  • Size of remote file: 8.76 MB
assets/examples_video/davis_dolphins.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da2bdf883de86f3ad1f7ec58e34f50cd8dc1bbde8288e23a635a7396ba1af13d
3
+ size 468524
assets/examples_video/davis_rollercoaster.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:291361c800b83ead49f50302ffc82f6ecd5205391934cc8354946b4b93e8cbb4
3
+ size 596021
assets/examples_video/davis_seasnake.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:247f92487bc7a14bf2364847f83e23c7c99addf28abaa043bb353edb6531cead
3
+ size 4010306
assets/paper.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e554e412ffc6e9e6edddc963baa2211692860ab0efa92d74bf7d09b18d2c597
3
+ size 4549655
assets/teaser.png ADDED

Git LFS Details

  • SHA256: 27556951def59ec85d5a0675752d0b525f69a499fe466dbe64f5fd62e3380b7f
  • Pointer size: 132 Bytes
  • Size of remote file: 5.66 MB
assets/video_edit/demo1_midas.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11c668214e74ff067cadd9f5beaa8b103360398f5c689c4a4db6c74b451a6963
3
+ size 187513
assets/video_edit/demo1_ours.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57e88c7610076ee422f53c112907339d5a87338cc9186453eb5bfaf6ed3a9257
3
+ size 431370
assets/video_edit/demo1_video.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfcb14cb7cec14c6b8198a6a455609dc5fac26c42628867e0d4412d53bfa0af7
3
+ size 174791
assets/video_edit/demo2_midas.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:833fd0e7b41f712073ff48373139b1433a0e61b0221a44971a25d57b34a92078
3
+ size 145623
assets/video_edit/demo2_ours.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c49d36aab8eeac806613a80c870fa6d3b62694ad08da0550f2e7d6d1b29553fd
3
+ size 222693
assets/video_edit/demo2_video.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb31e593a2a0973e37a5e004a25e7b5fde0e6e9234e283cc404ae7b9805b45ce
3
+ size 112277
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio_imageslider
2
+ gradio==4.14.0
3
+ torch
4
+ torchvision
5
+ opencv-python
6
+ huggingface_hub