reels

Running

App Files Files Community

salomonsky commited on Oct 18, 2024

Commit

8190818

verified ·

1 Parent(s): 2fee994

Update inference.py

Browse files

Files changed (1) hide show

inference.py +55 -1

inference.py CHANGED Viewed

@@ -8,10 +8,64 @@ import torch, face_detection
 from models import Wav2Lip
 import platform
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 print('Using {} for inference.'.format(device))
 parser = argparse.ArgumentParser(description='Inference code to lip-sync videos in the wild using Wav2Lip models')
 parser.add_argument('--checkpoint_path', type=str,

 from models import Wav2Lip
 import platform
+def face_detect(images):
+    detector = face_detection.FaceAlignment(face_detection.LandmarksType._2D,
+                                             flip_input=False, device=device)
+    batch_size = args.face_det_batch_size
+    max_size = 720  # Cambia a 512 si prefieres ese tamaño máximo
+    # Verificar el tamaño de las imágenes y redimensionar si es necesario
+    for i in range(len(images)):
+        h, w = images[i].shape[:2]
+        if h > max_size or w > max_size:
+            print("Image too big, resizing...")
+            if h > w:
+                ratio = max_size / float(h)
+                new_size = (int(w * ratio), max_size)
+            else:
+                ratio = max_size / float(w)
+                new_size = (max_size, int(h * ratio))
+            images[i] = cv2.resize(images[i], new_size)
+    while True:
+        predictions = []
+        try:
+            for i in tqdm(range(0, len(images), batch_size)):
+                predictions.extend(detector.get_detections_for_batch(np.array(images[i:i + batch_size])))
+        except RuntimeError:
+            if batch_size == 1:
+                raise RuntimeError('Image too big to run face detection on GPU. Please use the --resize_factor argument')
+            batch_size //= 2
+            print('Recovering from OOM error; New batch size: {}'.format(batch_size))
+            continue
+        break
+    results = []
+    pady1, pady2, padx1, padx2 = args.pads
+    for rect, image in zip(predictions, images):
+        if rect is None:
+            cv2.imwrite('temp/faulty_frame.jpg', image)
+            raise ValueError('Face not detected! Ensure the video contains a face in all the frames.')
+        y1 = max(0, rect[1] - pady1)
+        y2 = min(image.shape[0], rect[3] + pady2)
+        x1 = max(0, rect[0] - padx1)
+        x2 = min(image.shape[1], rect[2] + padx2)
+        results.append([x1, y1, x2, y2])
+    boxes = np.array(results)
+    if not args.nosmooth: boxes = get_smoothened_boxes(boxes, T=5)
+    results = [[image[y1: y2, x1:x2], (y1, y2, x1, x2)] for image, (x1, y1, x2, y2) in zip(images, boxes)]
+    del detector
+    return results
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 print('Using {} for inference.'.format(device))
 parser = argparse.ArgumentParser(description='Inference code to lip-sync videos in the wild using Wav2Lip models')
 parser.add_argument('--checkpoint_path', type=str,