feat: basic face recognition UI
Browse files- app.py +101 -21
- data/images/{c2NhbGUoKQ.jpg → 35639830294_a77085ffde_b.jpg} +2 -2
- data/images/813-nostril-bear_14999516389_o.jpg +3 -0
- ui.py +186 -0
app.py
CHANGED
@@ -10,29 +10,36 @@ import numpy as np
|
|
10 |
from PIL import Image
|
11 |
from ultralytics import YOLO
|
12 |
|
13 |
-
from
|
14 |
-
|
15 |
-
crop_from_yolov8,
|
16 |
-
get_best_device,
|
17 |
-
load_models,
|
18 |
-
resize,
|
19 |
-
run_pipeline,
|
20 |
-
setup,
|
21 |
-
square_pad,
|
22 |
-
)
|
23 |
|
24 |
|
25 |
-
def prediction_to_str(
|
26 |
"""
|
27 |
-
Turn the
|
28 |
"""
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
|
33 |
def interface_fn(
|
34 |
loaded_models: dict[str, Any], pil_image: Image.Image
|
35 |
-
) -> Tuple[Image.Image, Image.Image, str]:
|
36 |
"""
|
37 |
Main interface function that runs the model on the provided pil_image and
|
38 |
returns the exepected tuple to populate the gradio interface.
|
@@ -47,21 +54,80 @@ def interface_fn(
|
|
47 |
model.
|
48 |
"""
|
49 |
PARAM_SQUARE_DIM = 300
|
|
|
|
|
50 |
result = run_pipeline(
|
51 |
loaded_models=loaded_models,
|
52 |
pil_image=pil_image,
|
53 |
param_square_dim=PARAM_SQUARE_DIM,
|
54 |
-
param_k=
|
55 |
-
param_n_samples_per_individual=
|
56 |
knn_index_filepath=METRIC_LEARNING_KNN_INDEX_FILEPATH,
|
57 |
)
|
58 |
pil_image_segmented_head = result["stages"]["segmentation"]["output"]["pil_image"]
|
59 |
pil_image_cropped_head = result["stages"]["crop"]["output"]["pil_images"]["resized"]
|
60 |
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
-
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
|
67 |
def examples(dir_examples: Path) -> list[Path]:
|
@@ -111,6 +177,13 @@ with gr.Blocks() as demo:
|
|
111 |
)
|
112 |
output_segmentation_image = gr.Image(type="pil", label="model prediction")
|
113 |
output_cropped_image = gr.Image(type="pil", label="cropped bear face")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
output_raw = gr.Text(label="raw prediction")
|
115 |
|
116 |
fn = lambda pil_image: interface_fn(
|
@@ -121,7 +194,14 @@ with gr.Blocks() as demo:
|
|
121 |
title="ML pipeline for identifying bears from their faces 🐻",
|
122 |
fn=fn,
|
123 |
inputs=input,
|
124 |
-
outputs=[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
examples=image_filepaths,
|
126 |
flagging_mode="never",
|
127 |
)
|
|
|
10 |
from PIL import Image
|
11 |
from ultralytics import YOLO
|
12 |
|
13 |
+
from ui import bearid_ui
|
14 |
+
from utils import load_models, run_pipeline, setup
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
|
17 |
+
def prediction_to_str(bear_ids: list[str], indexed_k_nearest_individuals) -> str:
|
18 |
"""
|
19 |
+
Turn the prediction into a human friendly string.
|
20 |
"""
|
21 |
+
nearest_individuals = []
|
22 |
+
|
23 |
+
for j in range(len(bear_ids)):
|
24 |
+
bear_id = bear_ids[j]
|
25 |
+
nearest_individual = indexed_k_nearest_individuals[bear_id][0]
|
26 |
+
distance = nearest_individual["distance"]
|
27 |
+
nearest_individuals.append(
|
28 |
+
{"bear_id": bear_id, "distance": distance, "data": nearest_individual}
|
29 |
+
)
|
30 |
+
|
31 |
+
distance_str = "\n".join(
|
32 |
+
[
|
33 |
+
f"- {n['bear_id']} at distance {n['distance']:.2f} in the embedding space"
|
34 |
+
for n in nearest_individuals
|
35 |
+
]
|
36 |
+
)
|
37 |
+
return f"The model found that the closest individual is {bear_ids[0]}:\n\n{distance_str}"
|
38 |
|
39 |
|
40 |
def interface_fn(
|
41 |
loaded_models: dict[str, Any], pil_image: Image.Image
|
42 |
+
) -> Tuple[Image.Image, Image.Image, Image.Image, str, list[Image.Image], str]:
|
43 |
"""
|
44 |
Main interface function that runs the model on the provided pil_image and
|
45 |
returns the exepected tuple to populate the gradio interface.
|
|
|
54 |
model.
|
55 |
"""
|
56 |
PARAM_SQUARE_DIM = 300
|
57 |
+
k = 5
|
58 |
+
n_samples_per_individuals = 4
|
59 |
result = run_pipeline(
|
60 |
loaded_models=loaded_models,
|
61 |
pil_image=pil_image,
|
62 |
param_square_dim=PARAM_SQUARE_DIM,
|
63 |
+
param_k=k,
|
64 |
+
param_n_samples_per_individual=n_samples_per_individuals,
|
65 |
knn_index_filepath=METRIC_LEARNING_KNN_INDEX_FILEPATH,
|
66 |
)
|
67 |
pil_image_segmented_head = result["stages"]["segmentation"]["output"]["pil_image"]
|
68 |
pil_image_cropped_head = result["stages"]["crop"]["output"]["pil_images"]["resized"]
|
69 |
|
70 |
+
bear_ids = result["stages"]["identification"]["output"]["bear_ids"]
|
71 |
+
k_nearest_individuals = result["stages"]["identification"]["output"][
|
72 |
+
"k_nearest_individuals"
|
73 |
+
]
|
74 |
+
indexed_samples = result["stages"]["identification"]["output"]["indexed_samples"]
|
75 |
+
indexed_k_nearest_individuals = result["stages"]["identification"]["output"][
|
76 |
+
"indexed_k_nearest_individuals"
|
77 |
+
]
|
78 |
+
tmp = k_nearest_individuals
|
79 |
+
tmp = indexed_samples
|
80 |
+
bear_id = bear_ids[0]
|
81 |
+
other_bear_ids = bear_ids[1:]
|
82 |
+
|
83 |
+
images = []
|
84 |
+
for obid in other_bear_ids:
|
85 |
+
for fp in indexed_samples[obid]:
|
86 |
+
images.append(Image.open(fp))
|
87 |
+
|
88 |
+
output_filepath = Path("./data/07_model_output/predict/prediction.png")
|
89 |
+
output_filepath.parent.mkdir(parents=True, exist_ok=True)
|
90 |
+
|
91 |
+
bearid_ui(
|
92 |
+
pil_image_chip=pil_image_cropped_head,
|
93 |
+
indexed_k_nearest_individuals=indexed_k_nearest_individuals,
|
94 |
+
indexed_samples=indexed_samples,
|
95 |
+
save_filepath=output_filepath,
|
96 |
+
k_closest_neighbors=k,
|
97 |
+
)
|
98 |
+
|
99 |
+
pil_image_identification_prediction = Image.open(output_filepath)
|
100 |
|
101 |
+
nearest_individuals = []
|
102 |
+
|
103 |
+
for j in range(len(bear_ids)):
|
104 |
+
bear_id = bear_ids[j]
|
105 |
+
nearest_individual = indexed_k_nearest_individuals[bear_id][0]
|
106 |
+
distance = nearest_individual["distance"]
|
107 |
+
nearest_individuals.append(
|
108 |
+
{"bear_id": bear_id, "distance": distance, "data": nearest_individual}
|
109 |
+
)
|
110 |
+
|
111 |
+
distance_str = "\n".join(
|
112 |
+
[
|
113 |
+
f"- {n['bear_id']} at distance {n['distance']:.2f} in the embedding space"
|
114 |
+
for n in nearest_individuals
|
115 |
+
]
|
116 |
+
)
|
117 |
+
tmp = f"The model found that the closest individual is {bear_id}:\n\n{distance_str}"
|
118 |
+
|
119 |
+
raw_prediction_str = prediction_to_str(
|
120 |
+
bear_ids=bear_ids, indexed_k_nearest_individuals=indexed_k_nearest_individuals
|
121 |
+
)
|
122 |
+
|
123 |
+
return (
|
124 |
+
pil_image_segmented_head,
|
125 |
+
pil_image_cropped_head,
|
126 |
+
pil_image_identification_prediction,
|
127 |
+
bear_id,
|
128 |
+
[Image.open(fp) for fp in indexed_samples[bear_id]],
|
129 |
+
raw_prediction_str,
|
130 |
+
)
|
131 |
|
132 |
|
133 |
def examples(dir_examples: Path) -> list[Path]:
|
|
|
177 |
)
|
178 |
output_segmentation_image = gr.Image(type="pil", label="model prediction")
|
179 |
output_cropped_image = gr.Image(type="pil", label="cropped bear face")
|
180 |
+
output_bear_id = gr.Text(label="Most likely BearID")
|
181 |
+
output_bear_id_samples = gr.Gallery(
|
182 |
+
label="Similar faces of the predicted BearID", preview=True
|
183 |
+
)
|
184 |
+
output_identification_prediction_image = gr.Image(
|
185 |
+
type="pil", label="prediction", scale=3
|
186 |
+
)
|
187 |
output_raw = gr.Text(label="raw prediction")
|
188 |
|
189 |
fn = lambda pil_image: interface_fn(
|
|
|
194 |
title="ML pipeline for identifying bears from their faces 🐻",
|
195 |
fn=fn,
|
196 |
inputs=input,
|
197 |
+
outputs=[
|
198 |
+
output_segmentation_image,
|
199 |
+
output_cropped_image,
|
200 |
+
output_identification_prediction_image,
|
201 |
+
output_bear_id,
|
202 |
+
output_bear_id_samples,
|
203 |
+
output_raw,
|
204 |
+
],
|
205 |
examples=image_filepaths,
|
206 |
flagging_mode="never",
|
207 |
)
|
data/images/{c2NhbGUoKQ.jpg → 35639830294_a77085ffde_b.jpg}
RENAMED
File without changes
|
data/images/813-nostril-bear_14999516389_o.jpg
ADDED
![]() |
Git LFS Details
|
ui.py
ADDED
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
import numpy as np
|
5 |
+
import torchvision
|
6 |
+
from matplotlib import font_manager
|
7 |
+
from matplotlib.figure import Figure
|
8 |
+
from matplotlib.gridspec import GridSpec
|
9 |
+
from PIL import Image
|
10 |
+
|
11 |
+
IMAGENET_MEAN = [0.485, 0.456, 0.406]
|
12 |
+
IMAGENET_STD = [0.229, 0.224, 0.225]
|
13 |
+
DISTANCE_THRESHOLD_NEW_INDIVIDUAL = 0.7
|
14 |
+
|
15 |
+
|
16 |
+
def get_inverse_normalize_transform(mean, std):
|
17 |
+
return torchvision.transforms.Normalize(
|
18 |
+
mean=[-m / s for m, s in zip(mean, std)], std=[1 / s for s in std]
|
19 |
+
)
|
20 |
+
|
21 |
+
|
22 |
+
def get_color(
|
23 |
+
distance: float,
|
24 |
+
distance_threshold_new_individual: float = DISTANCE_THRESHOLD_NEW_INDIVIDUAL,
|
25 |
+
margin: float = 0.10,
|
26 |
+
) -> str:
|
27 |
+
threshold_unsure = distance_threshold_new_individual * (1.0 - margin)
|
28 |
+
threshold_new_individual = distance_threshold_new_individual * (1 + margin)
|
29 |
+
if distance < threshold_unsure:
|
30 |
+
return "green"
|
31 |
+
elif distance < threshold_new_individual:
|
32 |
+
return "orange"
|
33 |
+
else:
|
34 |
+
return "red"
|
35 |
+
|
36 |
+
|
37 |
+
def draw_extrated_chip(ax, chip_image) -> None:
|
38 |
+
ax.set_title("Extracted chip")
|
39 |
+
ax.set_axis_off()
|
40 |
+
ax.imshow(chip_image)
|
41 |
+
|
42 |
+
|
43 |
+
def draw_closest_neighbors(
|
44 |
+
fig: Figure,
|
45 |
+
gs: GridSpec,
|
46 |
+
i_start: int,
|
47 |
+
k_closest_neighbors: int,
|
48 |
+
indexed_k_nearest_individuals: dict,
|
49 |
+
) -> None:
|
50 |
+
inv_normalize = get_inverse_normalize_transform(
|
51 |
+
mean=IMAGENET_MEAN,
|
52 |
+
std=IMAGENET_STD,
|
53 |
+
)
|
54 |
+
|
55 |
+
neighbors = []
|
56 |
+
for bear_id, xs in indexed_k_nearest_individuals.items():
|
57 |
+
for x in xs:
|
58 |
+
data = x.copy()
|
59 |
+
data["bear_id"] = bear_id
|
60 |
+
neighbors.append(data)
|
61 |
+
|
62 |
+
nearest_neighbors = sorted(
|
63 |
+
neighbors,
|
64 |
+
key=lambda x: x["distance"],
|
65 |
+
)[:k_closest_neighbors]
|
66 |
+
for j, neighbor in enumerate(nearest_neighbors):
|
67 |
+
ax = fig.add_subplot(gs[i_start, j])
|
68 |
+
distance = neighbor["distance"]
|
69 |
+
bear_id = neighbor["bear_id"]
|
70 |
+
dataset_image = neighbor["dataset_image"]
|
71 |
+
image = inv_normalize(dataset_image).numpy()
|
72 |
+
image = np.transpose(image, (1, 2, 0))
|
73 |
+
color = get_color(distance=distance)
|
74 |
+
ax.set_axis_off()
|
75 |
+
ax.set_title(label=f"{bear_id}: {distance:.2f}", color=color)
|
76 |
+
ax.imshow(image)
|
77 |
+
|
78 |
+
|
79 |
+
def draw_top_k_individuals(
|
80 |
+
fig: Figure,
|
81 |
+
gs: GridSpec,
|
82 |
+
i_start: int,
|
83 |
+
i_end: int,
|
84 |
+
indexed_k_nearest_individuals: dict,
|
85 |
+
bear_ids: list[str],
|
86 |
+
indexed_samples: dict,
|
87 |
+
):
|
88 |
+
inv_normalize = get_inverse_normalize_transform(
|
89 |
+
mean=IMAGENET_MEAN,
|
90 |
+
std=IMAGENET_STD,
|
91 |
+
)
|
92 |
+
for i in range(i_start, i_end):
|
93 |
+
for j in range(len(bear_ids)):
|
94 |
+
# Draw the closest individual chips
|
95 |
+
if i == i_start:
|
96 |
+
ax = fig.add_subplot(gs[i, j])
|
97 |
+
bear_id = bear_ids[j]
|
98 |
+
nearest_individual = indexed_k_nearest_individuals[bear_id][0]
|
99 |
+
distance = nearest_individual["distance"]
|
100 |
+
dataset_image = nearest_individual["dataset_image"]
|
101 |
+
image = inv_normalize(dataset_image).numpy()
|
102 |
+
image = np.transpose(image, (1, 2, 0))
|
103 |
+
color = get_color(distance=distance)
|
104 |
+
ax.set_axis_off()
|
105 |
+
ax.set_title(label=f"{bear_id}: {distance:.2f}", color=color)
|
106 |
+
ax.imshow(image)
|
107 |
+
|
108 |
+
# Draw random chips from the same individuals
|
109 |
+
else:
|
110 |
+
bear_id = bear_ids[j]
|
111 |
+
idx = i - i_start - 1
|
112 |
+
if idx < len(indexed_samples[bear_id]):
|
113 |
+
filepath = indexed_samples[bear_id][idx]
|
114 |
+
if filepath:
|
115 |
+
ax = fig.add_subplot(gs[i, j])
|
116 |
+
with Image.open(filepath) as image:
|
117 |
+
ax.set_axis_off()
|
118 |
+
ax.imshow(image)
|
119 |
+
|
120 |
+
|
121 |
+
def bearid_ui(
|
122 |
+
pil_image_chip: Image.Image,
|
123 |
+
indexed_k_nearest_individuals: dict,
|
124 |
+
indexed_samples: dict,
|
125 |
+
save_filepath: Path,
|
126 |
+
k_closest_neighbors: int = 5,
|
127 |
+
) -> None:
|
128 |
+
"""Main UI for identifying bears."""
|
129 |
+
chip_image = pil_image_chip
|
130 |
+
# Assumption: the bear_ids are sorted by distance - if that's not something
|
131 |
+
# we can rely on, we should just sort
|
132 |
+
bear_ids = list(indexed_k_nearest_individuals.keys())
|
133 |
+
|
134 |
+
# Max of the number of closest_neighbors and the number of bearids
|
135 |
+
ncols = max(len(bear_ids), k_closest_neighbors)
|
136 |
+
|
137 |
+
# 1 row for the closest neighbors title section
|
138 |
+
# 1 row for the closest neighbors
|
139 |
+
# 1 row for the individuals title section
|
140 |
+
# rows for the indexed_samples (radom images of a given individual)
|
141 |
+
nrows = max([len(xs) for xs in indexed_samples.values()]) + 3
|
142 |
+
figsize = (3 * ncols, 3 * nrows)
|
143 |
+
fig = plt.figure(constrained_layout=True, figsize=figsize)
|
144 |
+
gs = GridSpec(nrows=nrows, ncols=ncols, figure=fig)
|
145 |
+
font_properties_section = font_manager.FontProperties(size=35)
|
146 |
+
font_properties_title = font_manager.FontProperties(size=40)
|
147 |
+
|
148 |
+
# Draw closest neighbors
|
149 |
+
i_closest_neighbors = 2
|
150 |
+
ax = fig.add_subplot(gs[i_closest_neighbors - 1, :])
|
151 |
+
ax.set_axis_off()
|
152 |
+
ax.text(
|
153 |
+
y=0.2,
|
154 |
+
x=0,
|
155 |
+
s="Closest faces",
|
156 |
+
font_properties=font_properties_section,
|
157 |
+
)
|
158 |
+
draw_closest_neighbors(
|
159 |
+
fig=fig,
|
160 |
+
gs=gs,
|
161 |
+
i_start=i_closest_neighbors,
|
162 |
+
k_closest_neighbors=k_closest_neighbors,
|
163 |
+
indexed_k_nearest_individuals=indexed_k_nearest_individuals,
|
164 |
+
)
|
165 |
+
# Filling out the grid with top k individuals and random samples
|
166 |
+
i_top_k_individual = 4
|
167 |
+
ax = fig.add_subplot(gs[i_top_k_individual - 1, :])
|
168 |
+
ax.set_axis_off()
|
169 |
+
ax.text(
|
170 |
+
y=0.2,
|
171 |
+
x=0,
|
172 |
+
s=f"Closest {len(bear_ids)} individuals",
|
173 |
+
font_properties=font_properties_section,
|
174 |
+
)
|
175 |
+
draw_top_k_individuals(
|
176 |
+
fig=fig,
|
177 |
+
gs=gs,
|
178 |
+
i_end=nrows,
|
179 |
+
i_start=i_top_k_individual,
|
180 |
+
indexed_k_nearest_individuals=indexed_k_nearest_individuals,
|
181 |
+
bear_ids=bear_ids,
|
182 |
+
indexed_samples=indexed_samples,
|
183 |
+
)
|
184 |
+
|
185 |
+
plt.savefig(save_filepath, bbox_inches="tight")
|
186 |
+
plt.close()
|