fabiosam commited on
Commit
56da706
verified
1 Parent(s): af4ab3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -66
app.py CHANGED
@@ -1,44 +1,38 @@
1
  import os
2
- import cv2
3
  import json
 
4
  import numpy as np
5
  import mediapipe as mp
6
  import tensorflow as tf
7
- from tensorflow import keras
8
  import gradio as gr
9
 
10
- # =========================
11
- # CONFIGURACI脫N B脕SICA
12
- # =========================
13
 
14
- MAX_FRAMES = 20 # debe ser el mismo valor que usaste al entrenar
15
- MODEL_DIR = "models"
 
16
 
17
- MODEL_PATH = os.path.join(MODEL_DIR, "sign_model_lstm_v1.keras")
18
- LABELS_PATH = os.path.join(MODEL_DIR, "label_names.json")
19
 
20
- print("TensorFlow version:", tf.__version__)
21
  print("Cargando modelo desde:", MODEL_PATH)
 
22
 
23
- # Carga del modelo LSTM
24
- model = keras.models.load_model(MODEL_PATH)
25
-
26
- # Carga de nombres de clase
27
  with open(LABELS_PATH, "r") as f:
28
  label_names = json.load(f)
29
 
30
  mp_holistic = mp.solutions.holistic
31
 
 
 
 
32
 
33
- # =========================
34
- # EXTRACCI脫N DE LANDMARKS
35
- # =========================
36
 
37
  def extract_landmarks_from_results(results):
38
  """
39
- Convierte los resultados de MediaPipe Holistic en un vector 1D.
40
- Pose (33), mano izq (21), mano der (21) -> 75 puntos.
41
- Cada punto = (x, y, z) => 75 * 3 = 225 features.
42
  """
43
  def get_xyz(landmarks, n_points):
44
  if landmarks is None:
@@ -67,18 +61,17 @@ def extract_landmarks_from_results(results):
67
  return np.array(all_points, dtype=np.float32).flatten() # (225,)
68
 
69
 
70
- # =========================
71
- # PROCESAR VIDEO -> SECUENCIA
72
- # =========================
73
-
74
  def preprocess_video_to_sequence(video_path, max_frames=MAX_FRAMES):
75
  """
76
- Procesa un video (archivo .mp4, .mov, etc.) con MediaPipe Holistic
77
- y devuelve una secuencia (1, max_frames, 225) lista para el modelo.
78
  """
79
- cap = cv2.VideoCapture(video_path)
 
80
 
 
81
  frames_feats = []
 
82
  with mp_holistic.Holistic(
83
  static_image_mode=False,
84
  model_complexity=1,
@@ -96,7 +89,7 @@ def preprocess_video_to_sequence(video_path, max_frames=MAX_FRAMES):
96
  frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
97
  results = holistic.process(frame_rgb)
98
 
99
- vec = extract_landmarks_from_results(results)
100
  frames_feats.append(vec)
101
 
102
  if len(frames_feats) >= max_frames:
@@ -109,7 +102,7 @@ def preprocess_video_to_sequence(video_path, max_frames=MAX_FRAMES):
109
 
110
  seq = np.array(frames_feats, dtype=np.float32)
111
 
112
- # Padding o recorte a max_frames
113
  if seq.shape[0] < max_frames:
114
  pad_len = max_frames - seq.shape[0]
115
  pad = np.zeros((pad_len, seq.shape[1]), dtype=np.float32)
@@ -121,62 +114,51 @@ def preprocess_video_to_sequence(video_path, max_frames=MAX_FRAMES):
121
  return seq
122
 
123
 
124
- # =========================
125
- # FUNCI脫N DE PREDICCI脫N PARA GRADIO
126
- # =========================
127
-
128
- def predict_sign(video):
129
  """
130
- Gradio pasa 'video' como ruta al archivo temporal (.mp4) grabado o subido.
 
 
131
  """
132
- if video is None:
133
  return "Sube o graba un video primero.", {}
134
 
135
  try:
136
- seq = preprocess_video_to_sequence(video, max_frames=MAX_FRAMES)
137
-
138
  probs = model.predict(seq, verbose=0)[0] # (num_classes,)
139
- idx = int(np.argmax(probs))
140
- label = label_names[idx]
141
- conf = float(probs[idx])
142
-
143
- # Para mostrar distribuci贸n de probabilidades en Gradio:
144
- probs_dict = {
145
- name: float(probs[i])
146
- for i, name in enumerate(label_names)
147
- }
148
 
149
- result_text = f"Se帽a predicha: {label} (confianza {conf:.2f})"
150
- return result_text, probs_dict
 
151
 
152
- except Exception as e:
153
- return f"Error procesando el video: {str(e)}", {}
154
 
 
 
155
 
156
- # =========================
157
- # INTERFAZ GRADIO
158
- # =========================
159
 
160
- title = "LSP-EnSe帽as - Demo LSTM"
161
- description = """
162
- Sube o graba un video corto haciendo una se帽a (por ejemplo, uno de los colores
163
- que se usaron en el entrenamiento). El modelo LSTM analiza la secuencia de
164
- landmarks (cuerpo y manos) usando MediaPipe Holistic y predice la clase m谩s probable.
165
- """
166
 
167
  demo = gr.Interface(
168
- fn=predict_sign,
169
  inputs=gr.Video(
170
- source="webcam", # tambi茅n permite subir archivo
171
- label="Video de la se帽a (webcam o upload)"
 
 
172
  ),
173
  outputs=[
174
  gr.Textbox(label="Resultado"),
175
  gr.Label(label="Probabilidades por clase")
176
  ],
177
- title=title,
178
- description=description,
179
- allow_flagging="never"
 
 
 
180
  )
181
 
182
  if __name__ == "__main__":
 
1
  import os
 
2
  import json
3
+ import cv2
4
  import numpy as np
5
  import mediapipe as mp
6
  import tensorflow as tf
 
7
  import gradio as gr
8
 
9
+ print("TensorFlow version:", tf.__version__)
 
 
10
 
11
+ # ==== RUTAS DEL MODELO ====
12
+ BASE_DIR = os.path.dirname(__file__)
13
+ MODELS_DIR = os.path.join(BASE_DIR, "models")
14
 
15
+ MODEL_PATH = os.path.join(MODELS_DIR, "sign_model_lstm_v1.keras")
16
+ LABELS_PATH = os.path.join(MODELS_DIR, "label_names.json")
17
 
 
18
  print("Cargando modelo desde:", MODEL_PATH)
19
+ model = tf.keras.models.load_model(MODEL_PATH)
20
 
 
 
 
 
21
  with open(LABELS_PATH, "r") as f:
22
  label_names = json.load(f)
23
 
24
  mp_holistic = mp.solutions.holistic
25
 
26
+ MAX_FRAMES = 20 # mismo valor que usaste al entrenar
27
+ N_FEATURES = 225 # 75 puntos * 3 coords (x, y, z)
28
+
29
 
30
+ # ========= FUNCIONES DE PROCESADO =========
 
 
31
 
32
  def extract_landmarks_from_results(results):
33
  """
34
+ Convierte los resultados de MediaPipe Holistic en un vector 1D (225,)
35
+ con pose (33), mano izq (21) y mano der (21).
 
36
  """
37
  def get_xyz(landmarks, n_points):
38
  if landmarks is None:
 
61
  return np.array(all_points, dtype=np.float32).flatten() # (225,)
62
 
63
 
 
 
 
 
64
  def preprocess_video_to_sequence(video_path, max_frames=MAX_FRAMES):
65
  """
66
+ Lee un video, extrae landmarks por frame y devuelve
67
+ una secuencia (1, max_frames, 225) lista para el LSTM.
68
  """
69
+ if video_path is None:
70
+ raise ValueError("No se recibi贸 ruta de video.")
71
 
72
+ cap = cv2.VideoCapture(video_path)
73
  frames_feats = []
74
+
75
  with mp_holistic.Holistic(
76
  static_image_mode=False,
77
  model_complexity=1,
 
89
  frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
90
  results = holistic.process(frame_rgb)
91
 
92
+ vec = extract_landmarks_from_results(results) # (225,)
93
  frames_feats.append(vec)
94
 
95
  if len(frames_feats) >= max_frames:
 
102
 
103
  seq = np.array(frames_feats, dtype=np.float32)
104
 
105
+ # padding / recorte
106
  if seq.shape[0] < max_frames:
107
  pad_len = max_frames - seq.shape[0]
108
  pad = np.zeros((pad_len, seq.shape[1]), dtype=np.float32)
 
114
  return seq
115
 
116
 
117
+ def predict_video_lstm(video_path):
 
 
 
 
118
  """
119
+ Funci贸n que usa Gradio:
120
+ - Recibe la ruta de un video
121
+ - Devuelve la predicci贸n principal + distribuci贸n de probabilidades
122
  """
123
+ if video_path is None:
124
  return "Sube o graba un video primero.", {}
125
 
126
  try:
127
+ seq = preprocess_video_to_sequence(video_path, max_frames=MAX_FRAMES)
 
128
  probs = model.predict(seq, verbose=0)[0] # (num_classes,)
129
+ except Exception as e:
130
+ return f"Error procesando el video: {e}", {}
 
 
 
 
 
 
 
131
 
132
+ idx = int(np.argmax(probs))
133
+ label = label_names[idx]
134
+ conf = float(probs[idx])
135
 
136
+ prob_dict = {name: float(probs[i]) for i, name in enumerate(label_names)}
 
137
 
138
+ texto = f"Predicci贸n: {label} (confianza {conf:.2f})"
139
+ return texto, prob_dict
140
 
 
 
 
141
 
142
+ # ========= INTERFAZ GRADIO =========
 
 
 
 
 
143
 
144
  demo = gr.Interface(
145
+ fn=predict_video_lstm,
146
  inputs=gr.Video(
147
+ sources=["upload", "webcam"], # 馃憟 AQU脥 est谩 la diferencia: sin 'source'
148
+ label="Video de se帽a (sube o graba)",
149
+ format="mp4",
150
+ type="filepath" # Gradio le pasa a la funci贸n la ruta del archivo
151
  ),
152
  outputs=[
153
  gr.Textbox(label="Resultado"),
154
  gr.Label(label="Probabilidades por clase")
155
  ],
156
+ title="LSP-EnSe帽as - Demo LSTM",
157
+ description=(
158
+ "Sube un video corto o gr谩balo con la webcam haciendo una se帽a. "
159
+ "El modelo LSTM procesa la secuencia de landmarks (pose + manos) y "
160
+ "muestra la clase m谩s probable y la distribuci贸n de probabilidades."
161
+ ),
162
  )
163
 
164
  if __name__ == "__main__":