init space
Browse files- .gitattributes +1 -0
- .gitignore +3 -0
- app.py +66 -0
- examples/image_a/01.jpg +3 -0
- examples/image_b/01.jpg +3 -0
- interpolator.py +142 -0
- requirements.txt +4 -0
- utils.py +20 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
*.mp4
|
2 |
+
.idea/
|
3 |
+
__pycache__/
|
app.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import mediapy
|
2 |
+
import gradio as gr
|
3 |
+
from utils import load_image
|
4 |
+
from interpolator import Interpolator, interpolate_recursively
|
5 |
+
|
6 |
+
path = "./smoot.mp4"
|
7 |
+
|
8 |
+
interpolator = Interpolator()
|
9 |
+
|
10 |
+
|
11 |
+
def predict(image_a, image_b):
|
12 |
+
image1 = load_image(image_a)
|
13 |
+
image2 = load_image(image_b)
|
14 |
+
input_frames = [image1, image2]
|
15 |
+
frames = list(interpolate_recursively(input_frames, interpolator))
|
16 |
+
mediapy.write_video(path, frames, fps=30)
|
17 |
+
return path
|
18 |
+
|
19 |
+
|
20 |
+
footer = r"""
|
21 |
+
<center>
|
22 |
+
<b>
|
23 |
+
Demo for <a href='https://www.tensorflow.org/hub/tutorials/tf_hub_delf_module'>DELF</a>
|
24 |
+
</b>
|
25 |
+
</center>
|
26 |
+
"""
|
27 |
+
|
28 |
+
coffe = r"""
|
29 |
+
<center>
|
30 |
+
<a href="https://www.buymeacoffee.com/leonelhs"><img src="https://img.buymeacoffee.com/button-api/?text=Buy me a
|
31 |
+
coffee&emoji=&slug=leonelhs&button_colour=FFDD00&font_colour=000000&font_family=Cookie&outline_colour=000000
|
32 |
+
&coffee_colour=ffffff" /></a>
|
33 |
+
</center>
|
34 |
+
"""
|
35 |
+
|
36 |
+
with gr.Blocks(title="DELF") as app:
|
37 |
+
gr.HTML("<center><h1>Match images using DELF</h1></center>")
|
38 |
+
gr.HTML("<center><h3>Neural network and logic for processing images to identify keypoints and their "
|
39 |
+
"descriptors.</h3></center>")
|
40 |
+
with gr.Row(equal_height=False):
|
41 |
+
with gr.Column():
|
42 |
+
input_img_a = gr.Image(type="filepath", label="Input image A")
|
43 |
+
input_img_b = gr.Image(type="filepath", label="Input image B")
|
44 |
+
run_btn = gr.Button(variant="primary")
|
45 |
+
with gr.Column():
|
46 |
+
output_img = gr.Video(format="mp4", label="Interpolate video")
|
47 |
+
gr.ClearButton(components=[input_img_a, input_img_b, output_img], variant="stop")
|
48 |
+
|
49 |
+
run_btn.click(predict, [input_img_a, input_img_b], [output_img])
|
50 |
+
|
51 |
+
with gr.Row():
|
52 |
+
blobs_a = [[f"examples/image_a/{x:02d}.jpg"] for x in range(1, 2)]
|
53 |
+
examples_a = gr.Dataset(components=[input_img_a], samples=blobs_a)
|
54 |
+
examples_a.click(lambda x: x[0], [examples_a], [input_img_a])
|
55 |
+
with gr.Row():
|
56 |
+
blobs_b = [[f"examples/image_b/{x:02d}.jpg"] for x in range(1, 2)]
|
57 |
+
examples_b = gr.Dataset(components=[input_img_b], samples=blobs_b)
|
58 |
+
examples_b.click(lambda x: x[0], [examples_b], [input_img_b])
|
59 |
+
|
60 |
+
with gr.Row():
|
61 |
+
gr.HTML(footer)
|
62 |
+
with gr.Row():
|
63 |
+
gr.HTML(coffe)
|
64 |
+
|
65 |
+
app.launch(share=False, debug=True, show_error=True)
|
66 |
+
app.queue()
|
examples/image_a/01.jpg
ADDED
Git LFS Details
|
examples/image_b/01.jpg
ADDED
Git LFS Details
|
interpolator.py
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import tensorflow as tf
|
3 |
+
import tensorflow_hub as hub
|
4 |
+
from typing import Generator, List, Iterable
|
5 |
+
|
6 |
+
"""A wrapper class for running a frame interpolation based on the FILM model on TFHub
|
7 |
+
|
8 |
+
Usage:
|
9 |
+
interpolator = Interpolator()
|
10 |
+
result_batch = interpolator(image_batch_0, image_batch_1, batch_dt)
|
11 |
+
Where image_batch_1 and image_batch_2 are numpy tensors with TF standard
|
12 |
+
(B,H,W,C) layout, batch_dt is the sub-frame time in range [0..1], (B,) layout.
|
13 |
+
"""
|
14 |
+
|
15 |
+
|
16 |
+
def _pad_to_align(x, align):
|
17 |
+
"""Pads image batch x so width and height divide by align.
|
18 |
+
|
19 |
+
Args:
|
20 |
+
x: Image batch to align.
|
21 |
+
align: Number to align to.
|
22 |
+
|
23 |
+
Returns:
|
24 |
+
1) An image padded so width % align == 0 and height % align == 0.
|
25 |
+
2) A bounding box that can be fed readily to tf.image.crop_to_bounding_box
|
26 |
+
to undo the padding.
|
27 |
+
"""
|
28 |
+
# Input checking.
|
29 |
+
assert np.ndim(x) == 4
|
30 |
+
assert align > 0, 'align must be a positive number.'
|
31 |
+
|
32 |
+
height, width = x.shape[-3:-1]
|
33 |
+
height_to_pad = (align - height % align) if height % align != 0 else 0
|
34 |
+
width_to_pad = (align - width % align) if width % align != 0 else 0
|
35 |
+
|
36 |
+
bbox_to_pad = {
|
37 |
+
'offset_height': height_to_pad // 2,
|
38 |
+
'offset_width': width_to_pad // 2,
|
39 |
+
'target_height': height + height_to_pad,
|
40 |
+
'target_width': width + width_to_pad
|
41 |
+
}
|
42 |
+
padded_x = tf.image.pad_to_bounding_box(x, **bbox_to_pad)
|
43 |
+
bbox_to_crop = {
|
44 |
+
'offset_height': height_to_pad // 2,
|
45 |
+
'offset_width': width_to_pad // 2,
|
46 |
+
'target_height': height,
|
47 |
+
'target_width': width
|
48 |
+
}
|
49 |
+
return padded_x, bbox_to_crop
|
50 |
+
|
51 |
+
|
52 |
+
class Interpolator:
|
53 |
+
"""A class for generating interpolated frames between two input frames.
|
54 |
+
|
55 |
+
Uses the Film model from TFHub
|
56 |
+
"""
|
57 |
+
|
58 |
+
def __init__(self, times_to_interpolate=6, align: int = 64) -> None:
|
59 |
+
"""Loads a saved model.
|
60 |
+
|
61 |
+
Args:
|
62 |
+
align: 'If >1, pad the input size so it divides with this before
|
63 |
+
inference.'
|
64 |
+
"""
|
65 |
+
self.times_to_interpolate = times_to_interpolate
|
66 |
+
self._model = hub.load("https://tfhub.dev/google/film/1")
|
67 |
+
self._align = align
|
68 |
+
|
69 |
+
def __call__(self, x0: np.ndarray, x1: np.ndarray,
|
70 |
+
dt: np.ndarray) -> np.ndarray:
|
71 |
+
"""Generates an interpolated frame between given two batches of frames.
|
72 |
+
|
73 |
+
All inputs should be np.float32 datatype.
|
74 |
+
|
75 |
+
Args:
|
76 |
+
x0: First image batch. Dimensions: (batch_size, height, width, channels)
|
77 |
+
x1: Second image batch. Dimensions: (batch_size, height, width, channels)
|
78 |
+
dt: Sub-frame time. Range [0,1]. Dimensions: (batch_size,)
|
79 |
+
|
80 |
+
Returns:
|
81 |
+
The result with dimensions (batch_size, height, width, channels).
|
82 |
+
"""
|
83 |
+
if self._align is not None:
|
84 |
+
x0, bbox_to_crop = _pad_to_align(x0, self._align)
|
85 |
+
x1, _ = _pad_to_align(x1, self._align)
|
86 |
+
|
87 |
+
inputs = {'x0': x0, 'x1': x1, 'time': dt[..., np.newaxis]}
|
88 |
+
result = self._model(inputs, training=False)
|
89 |
+
image = result['image']
|
90 |
+
|
91 |
+
if self._align is not None:
|
92 |
+
image = tf.image.crop_to_bounding_box(image, **bbox_to_crop)
|
93 |
+
return image.numpy()
|
94 |
+
|
95 |
+
|
96 |
+
def _recursive_generator(
|
97 |
+
frame1: np.ndarray, frame2: np.ndarray, num_recursions: int,
|
98 |
+
interpolator: Interpolator) -> Generator[np.ndarray, None, None]:
|
99 |
+
"""Splits halfway to repeatedly generate more frames.
|
100 |
+
|
101 |
+
Args:
|
102 |
+
frame1: Input image 1.
|
103 |
+
frame2: Input image 2.
|
104 |
+
num_recursions: How many times to interpolate the consecutive image pairs.
|
105 |
+
interpolator: The frame interpolator instance.
|
106 |
+
|
107 |
+
Yields:
|
108 |
+
The interpolated frames, including the first frame (frame1), but excluding
|
109 |
+
the final frame2.
|
110 |
+
"""
|
111 |
+
if num_recursions == 0:
|
112 |
+
yield frame1
|
113 |
+
else:
|
114 |
+
# Adds the batch dimension to all inputs before calling the interpolator,
|
115 |
+
# and remove it afterwards.
|
116 |
+
time = np.full(shape=(1,), fill_value=0.5, dtype=np.float32)
|
117 |
+
mid_frame = interpolator(np.expand_dims(frame1, axis=0), np.expand_dims(frame2, axis=0), time)[0]
|
118 |
+
yield from _recursive_generator(frame1, mid_frame, num_recursions - 1, interpolator)
|
119 |
+
yield from _recursive_generator(mid_frame, frame2, num_recursions - 1, interpolator)
|
120 |
+
|
121 |
+
|
122 |
+
def interpolate_recursively(
|
123 |
+
frames: List[np.ndarray], interpolator: Interpolator) -> Iterable[np.ndarray]:
|
124 |
+
"""Generates interpolated frames by repeatedly interpolating the midpoint.
|
125 |
+
|
126 |
+
Args:
|
127 |
+
frames: List of input frames. Expected shape (H, W, 3). The colors should be
|
128 |
+
in the range[0, 1] and in gamma space.
|
129 |
+
num_recursions: Number of times to do recursive midpoint
|
130 |
+
interpolation.
|
131 |
+
interpolator: The frame interpolation model to use.
|
132 |
+
|
133 |
+
Yields:
|
134 |
+
The interpolated frames (including the inputs).
|
135 |
+
"""
|
136 |
+
times_to_interpolate = interpolator.times_to_interpolate
|
137 |
+
|
138 |
+
n = len(frames)
|
139 |
+
for i in range(1, n):
|
140 |
+
yield from _recursive_generator(frames[i - 1], frames[i], times_to_interpolate, interpolator)
|
141 |
+
# Separately yield the final frame.
|
142 |
+
yield frames[-1]
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
tensorflow>=2.15.0
|
2 |
+
requests>=2.31.0
|
3 |
+
numpy>=1.23.5
|
4 |
+
mediapy>=1.2.0
|
utils.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import requests
|
3 |
+
import tensorflow as tf
|
4 |
+
|
5 |
+
_UINT8_MAX_F = float(np.iinfo(np.uint8).max)
|
6 |
+
|
7 |
+
|
8 |
+
def load_image(img_url: str):
|
9 |
+
"""Returns an image with shape [height, width, num_channels], with pixels in [0..1] range, and type np.float32."""
|
10 |
+
|
11 |
+
if img_url.startswith("https"):
|
12 |
+
user_agent = {'User-agent': 'Colab Sample (https://tensorflow.org)'}
|
13 |
+
response = requests.get(img_url, headers=user_agent)
|
14 |
+
image_data = response.content
|
15 |
+
else:
|
16 |
+
image_data = tf.io.read_file(img_url)
|
17 |
+
|
18 |
+
image = tf.io.decode_image(image_data, channels=3)
|
19 |
+
image_numpy = tf.cast(image, dtype=tf.float32).numpy()
|
20 |
+
return image_numpy / _UINT8_MAX_F
|