Files changed (6) hide show
  1. .gitattributes +0 -1
  2. README.md +6 -24
  3. app.py +54 -209
  4. metadata.csv +0 -0
  5. metadata.py +0 -23
  6. requirements.txt +1 -2
.gitattributes CHANGED
@@ -34,4 +34,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  *.jpg filter=lfs diff=lfs merge=lfs -text
37
- spaces::accelerator gpu
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  *.jpg filter=lfs diff=lfs merge=lfs -text
 
README.md CHANGED
@@ -1,30 +1,12 @@
1
  ---
2
- title: Facial Recognition App
3
- emoji: 🔍
4
- colorFrom: blue
5
- colorTo: purple
6
  sdk: gradio
7
- sdk_version: 5.29.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- # Facial Recognition App
13
-
14
- This application uses DeepFace and Facenet for facial recognition and similarity matching.
15
-
16
- ## Hardware Requirements
17
- - GPU: Required
18
- - CPU: 4+ cores recommended
19
- - RAM: 8GB+ recommended
20
-
21
- ## Environment Setup
22
- The application requires the following key dependencies:
23
- - deepface
24
- - gradio
25
- - huggingface_hub
26
- - datasets
27
- - Pillow
28
- - numpy
29
-
30
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Face Recognition
3
+ emoji:
4
+ colorFrom: red
5
+ colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 5.23.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,176 +1,42 @@
1
- import os
2
  import numpy as np
3
  from PIL import Image
4
  import gradio as gr
5
  from deepface import DeepFace
6
- from datasets import load_dataset
7
- import pickle
8
- from io import BytesIO
9
- from huggingface_hub import upload_file, hf_hub_download, list_repo_files
10
- from pathlib import Path
11
- import gc
12
- import requests
13
- import time
14
- import shutil
15
- import tarfile
16
- import tensorflow as tf
17
-
18
- # 🔁 Limpiar almacenamiento temporal si existe
19
- def clean_temp_dirs():
20
- print("🧹 Limpiando carpetas temporales...")
21
- for folder in ["embeddings", "batches"]:
22
- path = Path(folder)
23
- if path.exists() and path.is_dir():
24
- shutil.rmtree(path)
25
- print(f"✅ Carpeta eliminada: {folder}")
26
- path.mkdir(exist_ok=True)
27
-
28
- clean_temp_dirs()
29
-
30
- # 📁 Parámetros
31
- DATASET_ID = "Segizu/facial-recognition"
32
- EMBEDDINGS_SUBFOLDER = "embeddings"
33
- LOCAL_EMB_DIR = Path("embeddings")
34
- LOCAL_EMB_DIR.mkdir(exist_ok=True)
35
- HF_TOKEN = os.getenv("HF_TOKEN")
36
- headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
37
-
38
- # 💾 Configuración
39
- MAX_TEMP_STORAGE_GB = 40
40
- UPLOAD_EVERY = 50
41
 
42
- def get_folder_size(path):
43
- total = 0
44
- for dirpath, _, filenames in os.walk(path):
45
- for f in filenames:
46
- fp = os.path.join(dirpath, f)
47
- total += os.path.getsize(fp)
48
- return total / (1024 ** 3)
49
 
50
- def preprocess_image(img: Image.Image) -> np.ndarray:
 
51
  img_rgb = img.convert("RGB")
52
  img_resized = img_rgb.resize((160, 160), Image.Resampling.LANCZOS)
53
  return np.array(img_resized)
54
 
55
- # Cargar CSV desde el dataset
56
- dataset = load_dataset(
57
- "csv",
58
- data_files="metadata.csv",
59
- split="train",
60
- column_names=["image"],
61
- header=0
62
- )
63
- @GPU
64
  def build_database():
65
- print(f"📊 Uso actual de almacenamiento temporal INICIO: {get_folder_size('.'):.2f} GB")
66
- print("🔄 Generando embeddings...")
67
- batch_size = 10
68
- archive_batch_size = 50
69
- batch_files = []
70
- batch_index = 0
71
- ARCHIVE_DIR = Path("batches")
72
- ARCHIVE_DIR.mkdir(exist_ok=True)
73
-
74
- for i in range(0, len(dataset), batch_size):
75
- batch = dataset[i:i + batch_size]
76
- print(f"📦 Lote {i // batch_size + 1}/{(len(dataset) + batch_size - 1) // batch_size}")
77
-
78
- for j in range(len(batch["image"])):
79
- image_url = batch["image"][j]
80
-
81
- if not isinstance(image_url, str) or not image_url.startswith("http") or image_url.strip().lower() == "image":
82
- print(f"⚠️ Saltando {i + j} - URL inválida: {image_url}")
83
- continue
84
-
85
- name = f"image_{i + j}"
86
- filename = LOCAL_EMB_DIR / f"{name}.pkl"
87
-
88
- # Verificar si ya fue subido
89
- try:
90
- hf_hub_download(
91
- repo_id=DATASET_ID,
92
- repo_type="dataset",
93
- filename=f"{EMBEDDINGS_SUBFOLDER}/batch_{batch_index:03}.tar.gz",
94
- token=HF_TOKEN
95
- )
96
- print(f"⏩ Ya existe en remoto: {name}.pkl")
97
- continue
98
- except:
99
- pass
100
-
101
- try:
102
- response = requests.get(image_url, headers=headers, timeout=10)
103
- response.raise_for_status()
104
- img = Image.open(BytesIO(response.content)).convert("RGB")
105
-
106
- img_processed = preprocess_image(img)
107
- embedding = DeepFace.represent(
108
- img_path=img_processed,
109
- model_name="Facenet",
110
- enforce_detection=False
111
- )[0]["embedding"]
112
-
113
- with open(filename, "wb") as f:
114
- pickle.dump({"name": name, "img": img, "embedding": embedding}, f)
115
-
116
- batch_files.append(filename)
117
- del img_processed
118
- gc.collect()
119
-
120
- if len(batch_files) >= archive_batch_size or get_folder_size(".") > MAX_TEMP_STORAGE_GB:
121
- archive_path = ARCHIVE_DIR / f"batch_{batch_index:03}.tar.gz"
122
- with tarfile.open(archive_path, "w:gz") as tar:
123
- for file in batch_files:
124
- tar.add(file, arcname=file.name)
125
-
126
- print(f"📦 Empaquetado: {archive_path}")
127
-
128
- upload_file(
129
- path_or_fileobj=str(archive_path),
130
- path_in_repo=f"{EMBEDDINGS_SUBFOLDER}/{archive_path.name}",
131
- repo_id=DATASET_ID,
132
- repo_type="dataset",
133
- token=HF_TOKEN
134
- )
135
- print(f"✅ Subido: {archive_path.name}")
136
-
137
- for f in batch_files:
138
- f.unlink()
139
- archive_path.unlink()
140
- print("🧹 Limpieza completada tras subida")
141
-
142
- batch_files = []
143
- batch_index += 1
144
- time.sleep(2)
145
- print(f"📊 Uso actual FINAL: {get_folder_size('.'):.2f} GB")
146
-
147
- except Exception as e:
148
- print(f"❌ Error en {name}: {e}")
149
- continue
150
-
151
- if batch_files:
152
- archive_path = ARCHIVE_DIR / f"batch_{batch_index:03}.tar.gz"
153
- with tarfile.open(archive_path, "w:gz") as tar:
154
- for file in batch_files:
155
- tar.add(file, arcname=file.name)
156
-
157
- print(f"📦 Empaquetado final: {archive_path}")
158
-
159
- upload_file(
160
- path_or_fileobj=str(archive_path),
161
- path_in_repo=f"{EMBEDDINGS_SUBFOLDER}/{archive_path.name}",
162
- repo_id=DATASET_ID,
163
- repo_type="dataset",
164
- token=HF_TOKEN
165
- )
166
-
167
- for f in batch_files:
168
- f.unlink()
169
- archive_path.unlink()
170
- print("✅ Subida y limpieza final")
171
 
172
- # 🔍 Buscar similitudes
173
- def find_similar_faces(uploaded_image: Image.Image):
174
  try:
175
  img_processed = preprocess_image(uploaded_image)
176
  query_embedding = DeepFace.represent(
@@ -178,61 +44,40 @@ def find_similar_faces(uploaded_image: Image.Image):
178
  model_name="Facenet",
179
  enforce_detection=False
180
  )[0]["embedding"]
181
- del img_processed
182
- gc.collect()
183
- except Exception as e:
184
- return [], f"⚠ Error procesando imagen: {str(e)}"
185
 
186
  similarities = []
 
 
 
 
187
 
188
- try:
189
- embedding_files = [
190
- f for f in list_repo_files(DATASET_ID, repo_type="dataset", token=HF_TOKEN)
191
- if f.startswith(f"{EMBEDDINGS_SUBFOLDER}/") and f.endswith(".pkl")
192
- ]
193
- except Exception as e:
194
- return [], f"⚠ Error obteniendo archivos: {str(e)}"
195
 
196
- for file_path in embedding_files:
197
- try:
198
- file_bytes = requests.get(
199
- f"https://huggingface.co/datasets/{DATASET_ID}/resolve/main/{file_path}",
200
- headers=headers,
201
- timeout=10
202
- ).content
203
- record = pickle.loads(file_bytes)
204
 
205
- name = record["name"]
206
- img = record["img"]
207
- emb = record["embedding"]
208
 
209
- dist = np.linalg.norm(np.array(query_embedding) - np.array(emb))
210
- sim_score = 1 / (1 + dist)
211
- similarities.append((sim_score, name, np.array(img)))
212
-
213
- except Exception as e:
214
- print(f"⚠ Error con {file_path}: {e}")
215
- continue
216
-
217
- similarities.sort(reverse=True)
218
- top = similarities[:5]
219
- gallery = [(img, f"{name} - Similitud: {sim:.2f}") for sim, name, img in top]
220
- summary = "\n".join([f"{name} - Similitud: {sim:.2f}" for sim, name, _ in top])
221
- return gallery, summary
222
 
223
  # 🎛️ Interfaz Gradio
224
- with gr.Blocks() as demo:
225
- gr.Markdown("## 🔍 Reconocimiento facial con DeepFace + ZeroGPU")
226
- with gr.Row():
227
- image_input = gr.Image(label="📤 Sube una imagen", type="pil")
228
- find_btn = gr.Button("🔎 Buscar similares")
229
- gallery = gr.Gallery(label="📸 Rostros similares")
230
- summary = gr.Textbox(label="🧠 Detalle", lines=6)
231
- find_btn.click(fn=find_similar_faces, inputs=image_input, outputs=[gallery, summary])
232
-
233
- with gr.Row():
234
- build_btn = gr.Button("⚙️ Construir base de embeddings (usa GPU)")
235
- build_btn.click(fn=build_database, inputs=[], outputs=[])
236
 
237
  demo.launch()
238
-
 
 
1
  import numpy as np
2
  from PIL import Image
3
  import gradio as gr
4
  from deepface import DeepFace
5
+ from datasets import load_dataset, DownloadConfig
6
+ import os
7
+ os.system("rm -rf ~/.cache/huggingface/hub/datasets--Segizu--dataset_faces")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ # ✅ Cargar el dataset de Hugging Face forzando la descarga limpia
10
+ download_config = DownloadConfig(force_download=True)
11
+ dataset = load_dataset("Segizu/dataset_faces", download_config=download_config)
12
+ if "train" in dataset:
13
+ dataset = dataset["train"]
 
 
14
 
15
+ # 🔄 Preprocesar imagen para Facenet
16
+ def preprocess_image(img):
17
  img_rgb = img.convert("RGB")
18
  img_resized = img_rgb.resize((160, 160), Image.Resampling.LANCZOS)
19
  return np.array(img_resized)
20
 
21
+ # 📦 Construir base de datos de embeddings
 
 
 
 
 
 
 
 
22
  def build_database():
23
+ database = []
24
+ for i, item in enumerate(dataset):
25
+ try:
26
+ img = item["image"]
27
+ img_processed = preprocess_image(img)
28
+ embedding = DeepFace.represent(
29
+ img_path=img_processed,
30
+ model_name="Facenet",
31
+ enforce_detection=False
32
+ )[0]["embedding"]
33
+ database.append((f"image_{i}", img, embedding))
34
+ except Exception as e:
35
+ print(f"❌ No se pudo procesar imagen {i}: {e}")
36
+ return database
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ # 🔍 Buscar rostros similares
39
+ def find_similar_faces(uploaded_image):
40
  try:
41
  img_processed = preprocess_image(uploaded_image)
42
  query_embedding = DeepFace.represent(
 
44
  model_name="Facenet",
45
  enforce_detection=False
46
  )[0]["embedding"]
47
+ except:
48
+ return [], "⚠ No se detectó un rostro válido en la imagen."
 
 
49
 
50
  similarities = []
51
+ for name, db_img, embedding in database:
52
+ dist = np.linalg.norm(np.array(query_embedding) - np.array(embedding))
53
+ sim_score = 1 / (1 + dist)
54
+ similarities.append((sim_score, name, db_img))
55
 
56
+ similarities.sort(reverse=True)
57
+ top_matches = similarities[:]
 
 
 
 
 
58
 
59
+ gallery_items = []
60
+ text_summary = ""
61
+ for sim, name, img in top_matches:
62
+ caption = f"{name} - Similitud: {sim:.2f}"
63
+ gallery_items.append((img, caption))
64
+ text_summary += caption + "\n"
 
 
65
 
66
+ return gallery_items, text_summary
 
 
67
 
68
+ # ⚙️ Inicializar base
69
+ database = build_database()
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  # 🎛️ Interfaz Gradio
72
+ demo = gr.Interface(
73
+ fn=find_similar_faces,
74
+ inputs=gr.Image(label="📤 Sube una imagen", type="pil"),
75
+ outputs=[
76
+ gr.Gallery(label="📸 Rostros más similares"),
77
+ gr.Textbox(label="🧠 Similitud", lines=6)
78
+ ],
79
+ title="🔍 Buscador de Rostros con DeepFace",
80
+ description="Sube una imagen y se comparará contra los rostros del dataset alojado en Hugging Face (`Segizu/dataset_faces`)."
81
+ )
 
 
82
 
83
  demo.launch()
 
metadata.csv DELETED
The diff for this file is too large to render. See raw diff
 
metadata.py DELETED
@@ -1,23 +0,0 @@
1
- from huggingface_hub import HfApi
2
- import csv
3
- import os
4
-
5
- HF_TOKEN = os.getenv("HF_TOKEN") or ""
6
- repo_id = "Segizu/facial-recognition"
7
-
8
- api = HfApi()
9
- files = api.list_repo_files(repo_id=repo_id, repo_type="dataset", token=HF_TOKEN)
10
-
11
- # Generar URLs completas
12
- base_url = f"https://huggingface.co/datasets/{repo_id}/resolve/main/"
13
- image_urls = [base_url + f for f in files if f.lower().endswith(".jpg")]
14
-
15
- # Escribir nuevo metadata.csv
16
- with open("metadata.csv", "w", newline="") as f:
17
- writer = csv.writer(f)
18
- writer.writerow(["image"])
19
- for url in image_urls:
20
- writer.writerow([url])
21
-
22
- print(f"✅ metadata.csv regenerado con URLs absolutas ({len(image_urls)} imágenes)")
23
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- gradio==4.14.0
2
  numpy
3
  Pillow
4
  opencv-python-headless
@@ -9,4 +9,3 @@ git+https://github.com/serengil/deepface.git
9
  # Fixes para RetinaFace
10
  tensorflow==2.12.0
11
  tf-keras
12
- spaces
 
1
+ gradio
2
  numpy
3
  Pillow
4
  opencv-python-headless
 
9
  # Fixes para RetinaFace
10
  tensorflow==2.12.0
11
  tf-keras