achouffe commited on
Commit
8eea76c
·
verified ·
1 Parent(s): bf6c3c2

feat: basic face recognition UI

Browse files
app.py CHANGED
@@ -10,29 +10,36 @@ import numpy as np
10
  from PIL import Image
11
  from ultralytics import YOLO
12
 
13
- from utils import (
14
- bgr_to_rgb,
15
- crop_from_yolov8,
16
- get_best_device,
17
- load_models,
18
- resize,
19
- run_pipeline,
20
- setup,
21
- square_pad,
22
- )
23
 
24
 
25
- def prediction_to_str(yolo_prediction) -> str:
26
  """
27
- Turn the yolo_prediction into a human friendly string.
28
  """
29
- boxes = yolo_prediction.boxes
30
- return f"""{len(boxes.conf)} bear detected! Trigger the bear repellent 🐻"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
 
33
  def interface_fn(
34
  loaded_models: dict[str, Any], pil_image: Image.Image
35
- ) -> Tuple[Image.Image, Image.Image, str]:
36
  """
37
  Main interface function that runs the model on the provided pil_image and
38
  returns the exepected tuple to populate the gradio interface.
@@ -47,21 +54,80 @@ def interface_fn(
47
  model.
48
  """
49
  PARAM_SQUARE_DIM = 300
 
 
50
  result = run_pipeline(
51
  loaded_models=loaded_models,
52
  pil_image=pil_image,
53
  param_square_dim=PARAM_SQUARE_DIM,
54
- param_k=5,
55
- param_n_samples_per_individual=4,
56
  knn_index_filepath=METRIC_LEARNING_KNN_INDEX_FILEPATH,
57
  )
58
  pil_image_segmented_head = result["stages"]["segmentation"]["output"]["pil_image"]
59
  pil_image_cropped_head = result["stages"]["crop"]["output"]["pil_images"]["resized"]
60
 
61
- # raw_prediction_str = prediction_to_str(yolov8_segmentation_prediction)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
- return (pil_image_segmented_head, pil_image_cropped_head, str(result))
64
- return (pil_image_segmented_head, raw_prediction_str)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
 
67
  def examples(dir_examples: Path) -> list[Path]:
@@ -111,6 +177,13 @@ with gr.Blocks() as demo:
111
  )
112
  output_segmentation_image = gr.Image(type="pil", label="model prediction")
113
  output_cropped_image = gr.Image(type="pil", label="cropped bear face")
 
 
 
 
 
 
 
114
  output_raw = gr.Text(label="raw prediction")
115
 
116
  fn = lambda pil_image: interface_fn(
@@ -121,7 +194,14 @@ with gr.Blocks() as demo:
121
  title="ML pipeline for identifying bears from their faces 🐻",
122
  fn=fn,
123
  inputs=input,
124
- outputs=[output_segmentation_image, output_cropped_image, output_raw],
 
 
 
 
 
 
 
125
  examples=image_filepaths,
126
  flagging_mode="never",
127
  )
 
10
  from PIL import Image
11
  from ultralytics import YOLO
12
 
13
+ from ui import bearid_ui
14
+ from utils import load_models, run_pipeline, setup
 
 
 
 
 
 
 
 
15
 
16
 
17
+ def prediction_to_str(bear_ids: list[str], indexed_k_nearest_individuals) -> str:
18
  """
19
+ Turn the prediction into a human friendly string.
20
  """
21
+ nearest_individuals = []
22
+
23
+ for j in range(len(bear_ids)):
24
+ bear_id = bear_ids[j]
25
+ nearest_individual = indexed_k_nearest_individuals[bear_id][0]
26
+ distance = nearest_individual["distance"]
27
+ nearest_individuals.append(
28
+ {"bear_id": bear_id, "distance": distance, "data": nearest_individual}
29
+ )
30
+
31
+ distance_str = "\n".join(
32
+ [
33
+ f"- {n['bear_id']} at distance {n['distance']:.2f} in the embedding space"
34
+ for n in nearest_individuals
35
+ ]
36
+ )
37
+ return f"The model found that the closest individual is {bear_ids[0]}:\n\n{distance_str}"
38
 
39
 
40
  def interface_fn(
41
  loaded_models: dict[str, Any], pil_image: Image.Image
42
+ ) -> Tuple[Image.Image, Image.Image, Image.Image, str, list[Image.Image], str]:
43
  """
44
  Main interface function that runs the model on the provided pil_image and
45
  returns the exepected tuple to populate the gradio interface.
 
54
  model.
55
  """
56
  PARAM_SQUARE_DIM = 300
57
+ k = 5
58
+ n_samples_per_individuals = 4
59
  result = run_pipeline(
60
  loaded_models=loaded_models,
61
  pil_image=pil_image,
62
  param_square_dim=PARAM_SQUARE_DIM,
63
+ param_k=k,
64
+ param_n_samples_per_individual=n_samples_per_individuals,
65
  knn_index_filepath=METRIC_LEARNING_KNN_INDEX_FILEPATH,
66
  )
67
  pil_image_segmented_head = result["stages"]["segmentation"]["output"]["pil_image"]
68
  pil_image_cropped_head = result["stages"]["crop"]["output"]["pil_images"]["resized"]
69
 
70
+ bear_ids = result["stages"]["identification"]["output"]["bear_ids"]
71
+ k_nearest_individuals = result["stages"]["identification"]["output"][
72
+ "k_nearest_individuals"
73
+ ]
74
+ indexed_samples = result["stages"]["identification"]["output"]["indexed_samples"]
75
+ indexed_k_nearest_individuals = result["stages"]["identification"]["output"][
76
+ "indexed_k_nearest_individuals"
77
+ ]
78
+ tmp = k_nearest_individuals
79
+ tmp = indexed_samples
80
+ bear_id = bear_ids[0]
81
+ other_bear_ids = bear_ids[1:]
82
+
83
+ images = []
84
+ for obid in other_bear_ids:
85
+ for fp in indexed_samples[obid]:
86
+ images.append(Image.open(fp))
87
+
88
+ output_filepath = Path("./data/07_model_output/predict/prediction.png")
89
+ output_filepath.parent.mkdir(parents=True, exist_ok=True)
90
+
91
+ bearid_ui(
92
+ pil_image_chip=pil_image_cropped_head,
93
+ indexed_k_nearest_individuals=indexed_k_nearest_individuals,
94
+ indexed_samples=indexed_samples,
95
+ save_filepath=output_filepath,
96
+ k_closest_neighbors=k,
97
+ )
98
+
99
+ pil_image_identification_prediction = Image.open(output_filepath)
100
 
101
+ nearest_individuals = []
102
+
103
+ for j in range(len(bear_ids)):
104
+ bear_id = bear_ids[j]
105
+ nearest_individual = indexed_k_nearest_individuals[bear_id][0]
106
+ distance = nearest_individual["distance"]
107
+ nearest_individuals.append(
108
+ {"bear_id": bear_id, "distance": distance, "data": nearest_individual}
109
+ )
110
+
111
+ distance_str = "\n".join(
112
+ [
113
+ f"- {n['bear_id']} at distance {n['distance']:.2f} in the embedding space"
114
+ for n in nearest_individuals
115
+ ]
116
+ )
117
+ tmp = f"The model found that the closest individual is {bear_id}:\n\n{distance_str}"
118
+
119
+ raw_prediction_str = prediction_to_str(
120
+ bear_ids=bear_ids, indexed_k_nearest_individuals=indexed_k_nearest_individuals
121
+ )
122
+
123
+ return (
124
+ pil_image_segmented_head,
125
+ pil_image_cropped_head,
126
+ pil_image_identification_prediction,
127
+ bear_id,
128
+ [Image.open(fp) for fp in indexed_samples[bear_id]],
129
+ raw_prediction_str,
130
+ )
131
 
132
 
133
  def examples(dir_examples: Path) -> list[Path]:
 
177
  )
178
  output_segmentation_image = gr.Image(type="pil", label="model prediction")
179
  output_cropped_image = gr.Image(type="pil", label="cropped bear face")
180
+ output_bear_id = gr.Text(label="Most likely BearID")
181
+ output_bear_id_samples = gr.Gallery(
182
+ label="Similar faces of the predicted BearID", preview=True
183
+ )
184
+ output_identification_prediction_image = gr.Image(
185
+ type="pil", label="prediction", scale=3
186
+ )
187
  output_raw = gr.Text(label="raw prediction")
188
 
189
  fn = lambda pil_image: interface_fn(
 
194
  title="ML pipeline for identifying bears from their faces 🐻",
195
  fn=fn,
196
  inputs=input,
197
+ outputs=[
198
+ output_segmentation_image,
199
+ output_cropped_image,
200
+ output_identification_prediction_image,
201
+ output_bear_id,
202
+ output_bear_id_samples,
203
+ output_raw,
204
+ ],
205
  examples=image_filepaths,
206
  flagging_mode="never",
207
  )
data/images/{c2NhbGUoKQ.jpg → 35639830294_a77085ffde_b.jpg} RENAMED
File without changes
data/images/813-nostril-bear_14999516389_o.jpg ADDED

Git LFS Details

  • SHA256: 2f966960f2660ea16892d0c1eb9aa5f8ca74e016bfdbbc94b71349de4c154d9b
  • Pointer size: 131 Bytes
  • Size of remote file: 422 kB
ui.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ import matplotlib.pyplot as plt
4
+ import numpy as np
5
+ import torchvision
6
+ from matplotlib import font_manager
7
+ from matplotlib.figure import Figure
8
+ from matplotlib.gridspec import GridSpec
9
+ from PIL import Image
10
+
11
+ IMAGENET_MEAN = [0.485, 0.456, 0.406]
12
+ IMAGENET_STD = [0.229, 0.224, 0.225]
13
+ DISTANCE_THRESHOLD_NEW_INDIVIDUAL = 0.7
14
+
15
+
16
+ def get_inverse_normalize_transform(mean, std):
17
+ return torchvision.transforms.Normalize(
18
+ mean=[-m / s for m, s in zip(mean, std)], std=[1 / s for s in std]
19
+ )
20
+
21
+
22
+ def get_color(
23
+ distance: float,
24
+ distance_threshold_new_individual: float = DISTANCE_THRESHOLD_NEW_INDIVIDUAL,
25
+ margin: float = 0.10,
26
+ ) -> str:
27
+ threshold_unsure = distance_threshold_new_individual * (1.0 - margin)
28
+ threshold_new_individual = distance_threshold_new_individual * (1 + margin)
29
+ if distance < threshold_unsure:
30
+ return "green"
31
+ elif distance < threshold_new_individual:
32
+ return "orange"
33
+ else:
34
+ return "red"
35
+
36
+
37
+ def draw_extrated_chip(ax, chip_image) -> None:
38
+ ax.set_title("Extracted chip")
39
+ ax.set_axis_off()
40
+ ax.imshow(chip_image)
41
+
42
+
43
+ def draw_closest_neighbors(
44
+ fig: Figure,
45
+ gs: GridSpec,
46
+ i_start: int,
47
+ k_closest_neighbors: int,
48
+ indexed_k_nearest_individuals: dict,
49
+ ) -> None:
50
+ inv_normalize = get_inverse_normalize_transform(
51
+ mean=IMAGENET_MEAN,
52
+ std=IMAGENET_STD,
53
+ )
54
+
55
+ neighbors = []
56
+ for bear_id, xs in indexed_k_nearest_individuals.items():
57
+ for x in xs:
58
+ data = x.copy()
59
+ data["bear_id"] = bear_id
60
+ neighbors.append(data)
61
+
62
+ nearest_neighbors = sorted(
63
+ neighbors,
64
+ key=lambda x: x["distance"],
65
+ )[:k_closest_neighbors]
66
+ for j, neighbor in enumerate(nearest_neighbors):
67
+ ax = fig.add_subplot(gs[i_start, j])
68
+ distance = neighbor["distance"]
69
+ bear_id = neighbor["bear_id"]
70
+ dataset_image = neighbor["dataset_image"]
71
+ image = inv_normalize(dataset_image).numpy()
72
+ image = np.transpose(image, (1, 2, 0))
73
+ color = get_color(distance=distance)
74
+ ax.set_axis_off()
75
+ ax.set_title(label=f"{bear_id}: {distance:.2f}", color=color)
76
+ ax.imshow(image)
77
+
78
+
79
+ def draw_top_k_individuals(
80
+ fig: Figure,
81
+ gs: GridSpec,
82
+ i_start: int,
83
+ i_end: int,
84
+ indexed_k_nearest_individuals: dict,
85
+ bear_ids: list[str],
86
+ indexed_samples: dict,
87
+ ):
88
+ inv_normalize = get_inverse_normalize_transform(
89
+ mean=IMAGENET_MEAN,
90
+ std=IMAGENET_STD,
91
+ )
92
+ for i in range(i_start, i_end):
93
+ for j in range(len(bear_ids)):
94
+ # Draw the closest individual chips
95
+ if i == i_start:
96
+ ax = fig.add_subplot(gs[i, j])
97
+ bear_id = bear_ids[j]
98
+ nearest_individual = indexed_k_nearest_individuals[bear_id][0]
99
+ distance = nearest_individual["distance"]
100
+ dataset_image = nearest_individual["dataset_image"]
101
+ image = inv_normalize(dataset_image).numpy()
102
+ image = np.transpose(image, (1, 2, 0))
103
+ color = get_color(distance=distance)
104
+ ax.set_axis_off()
105
+ ax.set_title(label=f"{bear_id}: {distance:.2f}", color=color)
106
+ ax.imshow(image)
107
+
108
+ # Draw random chips from the same individuals
109
+ else:
110
+ bear_id = bear_ids[j]
111
+ idx = i - i_start - 1
112
+ if idx < len(indexed_samples[bear_id]):
113
+ filepath = indexed_samples[bear_id][idx]
114
+ if filepath:
115
+ ax = fig.add_subplot(gs[i, j])
116
+ with Image.open(filepath) as image:
117
+ ax.set_axis_off()
118
+ ax.imshow(image)
119
+
120
+
121
+ def bearid_ui(
122
+ pil_image_chip: Image.Image,
123
+ indexed_k_nearest_individuals: dict,
124
+ indexed_samples: dict,
125
+ save_filepath: Path,
126
+ k_closest_neighbors: int = 5,
127
+ ) -> None:
128
+ """Main UI for identifying bears."""
129
+ chip_image = pil_image_chip
130
+ # Assumption: the bear_ids are sorted by distance - if that's not something
131
+ # we can rely on, we should just sort
132
+ bear_ids = list(indexed_k_nearest_individuals.keys())
133
+
134
+ # Max of the number of closest_neighbors and the number of bearids
135
+ ncols = max(len(bear_ids), k_closest_neighbors)
136
+
137
+ # 1 row for the closest neighbors title section
138
+ # 1 row for the closest neighbors
139
+ # 1 row for the individuals title section
140
+ # rows for the indexed_samples (radom images of a given individual)
141
+ nrows = max([len(xs) for xs in indexed_samples.values()]) + 3
142
+ figsize = (3 * ncols, 3 * nrows)
143
+ fig = plt.figure(constrained_layout=True, figsize=figsize)
144
+ gs = GridSpec(nrows=nrows, ncols=ncols, figure=fig)
145
+ font_properties_section = font_manager.FontProperties(size=35)
146
+ font_properties_title = font_manager.FontProperties(size=40)
147
+
148
+ # Draw closest neighbors
149
+ i_closest_neighbors = 2
150
+ ax = fig.add_subplot(gs[i_closest_neighbors - 1, :])
151
+ ax.set_axis_off()
152
+ ax.text(
153
+ y=0.2,
154
+ x=0,
155
+ s="Closest faces",
156
+ font_properties=font_properties_section,
157
+ )
158
+ draw_closest_neighbors(
159
+ fig=fig,
160
+ gs=gs,
161
+ i_start=i_closest_neighbors,
162
+ k_closest_neighbors=k_closest_neighbors,
163
+ indexed_k_nearest_individuals=indexed_k_nearest_individuals,
164
+ )
165
+ # Filling out the grid with top k individuals and random samples
166
+ i_top_k_individual = 4
167
+ ax = fig.add_subplot(gs[i_top_k_individual - 1, :])
168
+ ax.set_axis_off()
169
+ ax.text(
170
+ y=0.2,
171
+ x=0,
172
+ s=f"Closest {len(bear_ids)} individuals",
173
+ font_properties=font_properties_section,
174
+ )
175
+ draw_top_k_individuals(
176
+ fig=fig,
177
+ gs=gs,
178
+ i_end=nrows,
179
+ i_start=i_top_k_individual,
180
+ indexed_k_nearest_individuals=indexed_k_nearest_individuals,
181
+ bear_ids=bear_ids,
182
+ indexed_samples=indexed_samples,
183
+ )
184
+
185
+ plt.savefig(save_filepath, bbox_inches="tight")
186
+ plt.close()