Segizu commited on
Commit
36f95d9
·
2 Parent(s): 9b396e8 a36d980

cache embeddings

Browse files
Files changed (1) hide show
  1. app.py +36 -4
app.py CHANGED
@@ -4,11 +4,27 @@ import gradio as gr
4
  from deepface import DeepFace
5
  from datasets import load_dataset, DownloadConfig
6
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  os.system("rm -rf ~/.cache/huggingface/hub/datasets--Segizu--dataset_faces")
8
 
9
  # ✅ Cargar el dataset de Hugging Face forzando la descarga limpia
10
- download_config = DownloadConfig(force_download=True)
11
- dataset = load_dataset("Segizu/dataset_faces", download_config=download_config)
 
 
 
12
  if "train" in dataset:
13
  dataset = dataset["train"]
14
 
@@ -20,6 +36,13 @@ def preprocess_image(img):
20
 
21
  # 📦 Construir base de datos de embeddings
22
  def build_database():
 
 
 
 
 
 
 
23
  database = []
24
  for i, item in enumerate(dataset):
25
  try:
@@ -31,8 +54,15 @@ def build_database():
31
  enforce_detection=False
32
  )[0]["embedding"]
33
  database.append((f"image_{i}", img, embedding))
 
34
  except Exception as e:
35
  print(f"❌ No se pudo procesar imagen {i}: {e}")
 
 
 
 
 
 
36
  return database
37
 
38
  # 🔍 Buscar rostros similares
@@ -54,7 +84,7 @@ def find_similar_faces(uploaded_image):
54
  similarities.append((sim_score, name, db_img))
55
 
56
  similarities.sort(reverse=True)
57
- top_matches = similarities[:]
58
 
59
  gallery_items = []
60
  text_summary = ""
@@ -66,7 +96,9 @@ def find_similar_faces(uploaded_image):
66
  return gallery_items, text_summary
67
 
68
  # ⚙️ Inicializar base
 
69
  database = build_database()
 
70
 
71
  # 🎛️ Interfaz Gradio
72
  demo = gr.Interface(
@@ -77,7 +109,7 @@ demo = gr.Interface(
77
  gr.Textbox(label="🧠 Similitud", lines=6)
78
  ],
79
  title="🔍 Buscador de Rostros con DeepFace",
80
- description="Sube una imagen y se comparará contra los rostros del dataset alojado en Hugging Face (`Segizu/dataset_faces`)."
81
  )
82
 
83
  demo.launch()
 
4
  from deepface import DeepFace
5
  from datasets import load_dataset, DownloadConfig
6
  import os
7
+ import pickle
8
+ from pathlib import Path
9
+
10
+ # 🔑 Configurar token de Hugging Face
11
+ HF_TOKEN = os.getenv("HF_TOKEN")
12
+ if not HF_TOKEN:
13
+ raise ValueError("⚠️ Por favor, configura la variable de entorno HF_TOKEN para acceder al dataset privado")
14
+
15
+ # 📁 Configurar directorio de caché
16
+ CACHE_DIR = Path("cache")
17
+ CACHE_DIR.mkdir(exist_ok=True)
18
+ EMBEDDINGS_CACHE = CACHE_DIR / "embeddings.pkl"
19
+
20
  os.system("rm -rf ~/.cache/huggingface/hub/datasets--Segizu--dataset_faces")
21
 
22
  # ✅ Cargar el dataset de Hugging Face forzando la descarga limpia
23
+ download_config = DownloadConfig(
24
+ force_download=True,
25
+ token=HF_TOKEN
26
+ )
27
+ dataset = load_dataset("Segizu/facial-recognition", download_config=download_config)
28
  if "train" in dataset:
29
  dataset = dataset["train"]
30
 
 
36
 
37
  # 📦 Construir base de datos de embeddings
38
  def build_database():
39
+ # Intentar cargar embeddings desde caché
40
+ if EMBEDDINGS_CACHE.exists():
41
+ print("📂 Cargando embeddings desde caché...")
42
+ with open(EMBEDDINGS_CACHE, 'rb') as f:
43
+ return pickle.load(f)
44
+
45
+ print("🔄 Calculando embeddings (esto puede tomar unos minutos)...")
46
  database = []
47
  for i, item in enumerate(dataset):
48
  try:
 
54
  enforce_detection=False
55
  )[0]["embedding"]
56
  database.append((f"image_{i}", img, embedding))
57
+ print(f"✅ Procesada imagen {i+1}/{len(dataset)}")
58
  except Exception as e:
59
  print(f"❌ No se pudo procesar imagen {i}: {e}")
60
+
61
+ # Guardar embeddings en caché
62
+ print("💾 Guardando embeddings en caché...")
63
+ with open(EMBEDDINGS_CACHE, 'wb') as f:
64
+ pickle.dump(database, f)
65
+
66
  return database
67
 
68
  # 🔍 Buscar rostros similares
 
84
  similarities.append((sim_score, name, db_img))
85
 
86
  similarities.sort(reverse=True)
87
+ top_matches = similarities[:5]
88
 
89
  gallery_items = []
90
  text_summary = ""
 
96
  return gallery_items, text_summary
97
 
98
  # ⚙️ Inicializar base
99
+ print("🚀 Iniciando aplicación...")
100
  database = build_database()
101
+ print(f"✅ Base de datos cargada con {len(database)} imágenes")
102
 
103
  # 🎛️ Interfaz Gradio
104
  demo = gr.Interface(
 
109
  gr.Textbox(label="🧠 Similitud", lines=6)
110
  ],
111
  title="🔍 Buscador de Rostros con DeepFace",
112
+ description="Sube una imagen y se comparará contra los rostros del dataset alojado en Hugging Face (`Segizu/facial-recognition`)."
113
  )
114
 
115
  demo.launch()