Spaces:

victorisgeek
/

VideoReface

Running

App Files Files Community

victorisgeek commited on May 29

Commit

872c724

•

1 Parent(s): 4b0bc65

Upload 6 files

Browse files

Files changed (6) hide show

app.py +93 -0
refacer.py +262 -0
requirements-COREML.txt +12 -0
requirements-GPU.txt +12 -0
requirements.txt +12 -0
script.py +41 -0

app.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import gradio as gr
+from refacer import Refacer
+import argparse
+import ngrok
+parser = argparse.ArgumentParser(description='Refacer')
+parser.add_argument("--max_num_faces", type=int, help="Max number of faces on UI", default=5)
+parser.add_argument("--force_cpu", help="Force CPU mode", default=False, action="store_true")
+parser.add_argument("--share_gradio", help="Share Gradio", default=False, action="store_true")
+parser.add_argument("--server_name", type=str, help="Server IP address", default="127.0.0.1")
+parser.add_argument("--server_port", type=int, help="Server port", default=7860)
+parser.add_argument("--colab_performance", help="Use in colab for better performance", default=False,action="store_true")
+parser.add_argument("--ngrok", type=str, help="Use ngrok", default=None)
+parser.add_argument("--ngrok_region", type=str, help="ngrok region", default="us")
+args = parser.parse_args()
+refacer = Refacer(force_cpu=args.force_cpu,colab_performance=args.colab_performance)
+num_faces=args.max_num_faces
+# Connect to ngrok for ingress
+def connect(token, port, options):
+    account = None
+    if token is None:
+        token = 'None'
+    else:
+        if ':' in token:
+            # token = authtoken:username:password
+            token, username, password = token.split(':', 2)
+            account = f"{username}:{password}"
+    # For all options see: https://github.com/ngrok/ngrok-py/blob/main/examples/ngrok-connect-full.py
+    if not options.get('authtoken_from_env'):
+        options['authtoken'] = token
+    if account:
+        options['basic_auth'] = account
+    try:
+        public_url = ngrok.connect(f"127.0.0.1:{port}", **options).url()
+    except Exception as e:
+        print(f'Invalid ngrok authtoken? ngrok connection aborted due to: {e}\n'
+              f'Your token: {token}, get the right one on https://dashboard.ngrok.com/get-started/your-authtoken')
+    else:
+        print(f'ngrok connected to localhost:{port}! URL: {public_url}\n'
+               'You can use this link after the launch is complete.')
+def run(*vars):
+    video_path=vars[0]
+    origins=vars[1:(num_faces+1)]
+    destinations=vars[(num_faces+1):(num_faces*2)+1]
+    thresholds=vars[(num_faces*2)+1:]
+    faces = []
+    for k in range(0,num_faces):
+        if origins[k] is not None and destinations[k] is not None:
+            faces.append({
+                'origin':origins[k],
+                'destination':destinations[k],
+                'threshold':thresholds[k]
+            })
+    return refacer.reface(video_path,faces)
+origin = []
+destination = []
+thresholds = []
+with gr.Blocks() as demo:
+    with gr.Row():
+        gr.Markdown("# Refacer")
+    with gr.Row():
+        video=gr.Video(label="Original video",format="mp4")
+        video2=gr.Video(label="Refaced video",interactive=False,format="mp4")
+    for i in range(0,num_faces):
+        with gr.Tab(f"Face #{i+1}"):
+            with gr.Row():
+                origin.append(gr.Image(label="Face to replace"))
+                destination.append(gr.Image(label="Destination face"))
+            with gr.Row():
+                thresholds.append(gr.Slider(label="Threshold",minimum=0.0,maximum=1.0,value=0.2))
+    with gr.Row():
+        button=gr.Button("Reface", variant="primary")
+    button.click(fn=run,inputs=[video]+origin+destination+thresholds,outputs=[video2])
+if args.ngrok is not None:
+    connect(args.ngrok, args.server_port, {'region': args.ngrok_region, 'authtoken_from_env': False})
+#demo.launch(share=True,server_name="0.0.0.0", show_error=True)
+demo.queue().launch(show_error=True,share=args.share_gradio,server_name=args.server_name,server_port=args.server_port)

refacer.py ADDED Viewed

	@@ -0,0 +1,262 @@

+import cv2
+import onnxruntime as rt
+import sys
+from insightface.app import FaceAnalysis
+sys.path.insert(1, './recognition')
+from scrfd import SCRFD
+from arcface_onnx import ArcFaceONNX
+import os.path as osp
+import os
+from pathlib import Path
+from tqdm import tqdm
+import ffmpeg
+import random
+import multiprocessing as mp
+from concurrent.futures import ThreadPoolExecutor
+from insightface.model_zoo.inswapper import INSwapper
+import psutil
+from enum import Enum
+from insightface.app.common import Face
+from insightface.utils.storage import ensure_available
+import re
+import subprocess
+class RefacerMode(Enum):
+     CPU, CUDA, COREML, TENSORRT = range(1, 5)
+class Refacer:
+    def __init__(self,force_cpu=False,colab_performance=False):
+        self.first_face = False
+        self.force_cpu = force_cpu
+        self.colab_performance = colab_performance
+        self.__check_encoders()
+        self.__check_providers()
+        self.total_mem = psutil.virtual_memory().total
+        self.__init_apps()
+    def __check_providers(self):
+        if self.force_cpu :
+            self.providers = ['CPUExecutionProvider']
+        else:
+            self.providers = rt.get_available_providers()
+        rt.set_default_logger_severity(4)
+        self.sess_options = rt.SessionOptions()
+        self.sess_options.execution_mode = rt.ExecutionMode.ORT_SEQUENTIAL
+        self.sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_ALL
+        if len(self.providers) == 1 and 'CPUExecutionProvider' in self.providers:
+            self.mode = RefacerMode.CPU
+            self.use_num_cpus = mp.cpu_count()-1
+            self.sess_options.intra_op_num_threads = int(self.use_num_cpus/3)
+            print(f"CPU mode with providers {self.providers}")
+        elif self.colab_performance:
+            self.mode = RefacerMode.TENSORRT
+            self.use_num_cpus = mp.cpu_count()-1
+            self.sess_options.intra_op_num_threads = int(self.use_num_cpus/3)
+            print(f"TENSORRT mode with providers {self.providers}")
+        elif 'CoreMLExecutionProvider' in self.providers:
+            self.mode = RefacerMode.COREML
+            self.use_num_cpus = mp.cpu_count()-1
+            self.sess_options.intra_op_num_threads = int(self.use_num_cpus/3)
+            print(f"CoreML mode with providers {self.providers}")
+        elif 'CUDAExecutionProvider' in self.providers:
+            self.mode = RefacerMode.CUDA
+            self.use_num_cpus = 2
+            self.sess_options.intra_op_num_threads = 1
+            if 'TensorrtExecutionProvider' in self.providers:
+                self.providers.remove('TensorrtExecutionProvider')
+            print(f"CUDA mode with providers {self.providers}")
+        """
+        elif 'TensorrtExecutionProvider' in self.providers:
+            self.mode = RefacerMode.TENSORRT
+            #self.use_num_cpus = 1
+            #self.sess_options.intra_op_num_threads = 1
+            self.use_num_cpus = mp.cpu_count()-1
+            self.sess_options.intra_op_num_threads = int(self.use_num_cpus/3)
+            print(f"TENSORRT mode with providers {self.providers}")
+        """
+    def __init_apps(self):
+        assets_dir = ensure_available('models', 'buffalo_l', root='~/.insightface')
+        model_path = os.path.join(assets_dir, 'det_10g.onnx')
+        sess_face = rt.InferenceSession(model_path, self.sess_options, providers=self.providers)
+        self.face_detector = SCRFD(model_path,sess_face)
+        self.face_detector.prepare(0,input_size=(640, 640))
+        model_path = os.path.join(assets_dir , 'w600k_r50.onnx')
+        sess_rec = rt.InferenceSession(model_path, self.sess_options, providers=self.providers)
+        self.rec_app = ArcFaceONNX(model_path,sess_rec)
+        self.rec_app.prepare(0)
+        model_path = 'inswapper_128.onnx'
+        sess_swap = rt.InferenceSession(model_path, self.sess_options, providers=self.providers)
+        self.face_swapper = INSwapper(model_path,sess_swap)
+    def prepare_faces(self, faces):
+        self.replacement_faces=[]
+        for face in faces:
+            #image1 = cv2.imread(face.origin)
+            if "origin" in face:
+                face_threshold = face['threshold']
+                bboxes1, kpss1 = self.face_detector.autodetect(face['origin'], max_num=1)
+                if len(kpss1)<1:
+                    raise Exception('No face detected on "Face to replace" image')
+                feat_original = self.rec_app.get(face['origin'], kpss1[0])
+            else:
+                face_threshold = 0
+                self.first_face = True
+                feat_original = None
+                print('No origin image: First face change')
+            #image2 = cv2.imread(face.destination)
+            _faces = self.__get_faces(face['destination'],max_num=1)
+            if len(_faces)<1:
+                raise Exception('No face detected on "Destination face" image')
+            self.replacement_faces.append((feat_original,_faces[0],face_threshold))
+    def __convert_video(self,video_path,output_video_path):
+        if self.video_has_audio:
+            print("Merging audio with the refaced video...")
+            new_path = output_video_path + str(random.randint(0,999)) + "_c.mp4"
+            #stream = ffmpeg.input(output_video_path)
+            in1 = ffmpeg.input(output_video_path)
+            in2 = ffmpeg.input(video_path)
+            out = ffmpeg.output(in1.video, in2.audio, new_path,video_bitrate=self.ffmpeg_video_bitrate,vcodec=self.ffmpeg_video_encoder)
+            out.run(overwrite_output=True,quiet=True)
+        else:
+            new_path = output_video_path
+            print("The video doesn't have audio, so post-processing is not necessary")
+        print(f"The process has finished.\nThe refaced video can be found at {os.path.abspath(new_path)}")
+        return new_path
+    def __get_faces(self,frame,max_num=0):
+        bboxes, kpss = self.face_detector.detect(frame,max_num=max_num,metric='default')
+        if bboxes.shape[0] == 0:
+            return []
+        ret = []
+        for i in range(bboxes.shape[0]):
+            bbox = bboxes[i, 0:4]
+            det_score = bboxes[i, 4]
+            kps = None
+            if kpss is not None:
+                kps = kpss[i]
+            face = Face(bbox=bbox, kps=kps, det_score=det_score)
+            face.embedding = self.rec_app.get(frame, kps)
+            ret.append(face)
+        return ret
+    def process_first_face(self,frame):
+        faces = self.__get_faces(frame,max_num=1)
+        if len(faces) != 0:
+            frame = self.face_swapper.get(frame, faces[0], self.replacement_faces[0][1], paste_back=True)
+        return frame
+    def process_faces(self,frame):
+        faces = self.__get_faces(frame,max_num=0)
+        for rep_face in self.replacement_faces:
+            for i in range(len(faces) - 1, -1, -1):
+                sim = self.rec_app.compute_sim(rep_face[0], faces[i].embedding)
+                if sim>=rep_face[2]:
+                    frame = self.face_swapper.get(frame, faces[i], rep_face[1], paste_back=True)
+                    del faces[i]
+                    break
+        return frame
+    def __check_video_has_audio(self,video_path):
+        self.video_has_audio = False
+        probe = ffmpeg.probe(video_path)
+        audio_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'audio'), None)
+        if audio_stream is not None:
+            self.video_has_audio = True
+    def reface_group(self, faces, frames, output):
+        with ThreadPoolExecutor(max_workers = self.use_num_cpus) as executor:
+            if self.first_face:
+                results = list(tqdm(executor.map(self.process_first_face, frames), total=len(frames),desc="Processing frames"))
+            else:
+                results = list(tqdm(executor.map(self.process_faces, frames), total=len(frames),desc="Processing frames"))
+            for result in results:
+                output.write(result)
+    def reface(self, video_path, faces):
+        self.__check_video_has_audio(video_path)
+        output_video_path = os.path.join('out',Path(video_path).name)
+        self.prepare_faces(faces)
+        cap = cv2.VideoCapture(video_path)
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        print(f"Total frames: {total_frames}")
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+        output = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))
+        frames=[]
+        self.k = 1
+        with tqdm(total=total_frames,desc="Extracting frames") as pbar:
+            while cap.isOpened():
+                flag, frame = cap.read()
+                if flag and len(frame)>0:
+                    frames.append(frame.copy())
+                    pbar.update()
+                else:
+                    break
+                if (len(frames) > 1000):
+                    self.reface_group(faces,frames,output)
+                    frames=[]
+            cap.release()
+            pbar.close()
+        self.reface_group(faces,frames,output)
+        frames=[]
+        output.release()
+        return self.__convert_video(video_path,output_video_path)
+    def __try_ffmpeg_encoder(self, vcodec):
+        print(f"Trying FFMPEG {vcodec} encoder")
+        command = ['ffmpeg', '-y', '-f','lavfi','-i','testsrc=duration=1:size=1280x720:rate=30','-vcodec',vcodec,'testsrc.mp4']
+        try:
+            subprocess.run(command, check=True, capture_output=True).stderr
+        except subprocess.CalledProcessError as e:
+            print(f"FFMPEG {vcodec} encoder doesn't work -> Disabled.")
+            return False
+        print(f"FFMPEG {vcodec} encoder works")
+        return True
+    def __check_encoders(self):
+        self.ffmpeg_video_encoder='libx264'
+        self.ffmpeg_video_bitrate='0'
+        pattern = r"encoders: ([a-zA-Z0-9_]+(?: [a-zA-Z0-9_]+)*)"
+        command = ['ffmpeg', '-codecs', '--list-encoders']
+        commandout = subprocess.run(command, check=True, capture_output=True).stdout
+        result = commandout.decode('utf-8').split('\n')
+        for r in result:
+            if "264" in r:
+                encoders = re.search(pattern, r).group(1).split(' ')
+                for v_c in Refacer.VIDEO_CODECS:
+                    for v_k in encoders:
+                        if v_c == v_k:
+                            if self.__try_ffmpeg_encoder(v_k):
+                                self.ffmpeg_video_encoder=v_k
+                                self.ffmpeg_video_bitrate=Refacer.VIDEO_CODECS[v_k]
+                                print(f"Video codec for FFMPEG: {self.ffmpeg_video_encoder}")
+                                return
+    VIDEO_CODECS = {
+         'h264_videotoolbox':'0', #osx HW acceleration
+         'h264_nvenc':'0', #NVIDIA HW acceleration
+         #'h264_qsv', #Intel HW acceleration
+         #'h264_vaapi', #Intel HW acceleration
+         #'h264_omx', #HW acceleration
+         'libx264':'0' #No HW acceleration
+    }

requirements-COREML.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+ffmpeg_python==0.2.0
+gradio==3.33.1
+insightface==0.7.3
+numpy==1.24.3
+onnx==1.14.0
+onnxruntime-silicon
+opencv_python==4.7.0.72
+opencv_python_headless==4.7.0.72
+scikit-image==0.20.0
+tqdm
+psutil
+ngrok

requirements-GPU.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+ffmpeg_python==0.2.0
+gradio==3.33.1
+insightface==0.7.3
+numpy==1.24.3
+onnx==1.14.0
+onnxruntime_gpu==1.15.0
+opencv_python==4.7.0.72
+opencv_python_headless==4.7.0.72
+scikit-image==0.20.0
+tqdm
+psutil
+ngrok

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+ffmpeg_python==0.2.0
+gradio==3.33.1
+insightface==0.7.3
+numpy==1.24.3
+onnx==1.14.0
+onnxruntime==1.15.0
+opencv_python==4.7.0.72
+opencv_python_headless==4.7.0.72
+scikit-image==0.20.0
+tqdm
+psutil
+ngrok

script.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from refacer import Refacer
+from os.path import exists
+import argparse
+import cv2
+parser = argparse.ArgumentParser(description='Refacer')
+parser.add_argument("--force_cpu", help="Force CPU mode", default=False, action="store_true")
+parser.add_argument("--colab_performance", help="Use in colab for better performance", default=False,action="store_true")
+parser.add_argument("--face", help="Face to replace (ex: <src>,<dst>,<thresh=0.2>)", nargs='+', action="append", required=True)
+parser.add_argument("--video", help="Video to parse", required=True)
+args = parser.parse_args()
+refacer = Refacer(force_cpu=args.force_cpu,colab_performance=args.colab_performance)
+def run(video_path,faces):
+    video_path_exists = exists(video_path)
+    if video_path_exists == False:
+        print ("Can't find " + video_path)
+        return
+    faces_out = []
+    for face in faces:
+        face_str = face[0].split(",")
+        origin = exists(face_str[0])
+        if origin == False:
+            print ("Can't find " + face_str[0])
+            return
+        destination = exists(face_str[1])
+        if destination == False:
+            print ("Can't find " + face_str[1])
+            return
+        faces_out.append({
+                'origin':cv2.imread(face_str[0]),
+                'destination':cv2.imread(face_str[1]),
+                'threshold':float(face_str[2])
+            })
+    return refacer.reface(video_path,faces_out)
+run(args.video, args.face)