Spaces:

PraneshJs
/

fakevideodetect

Running

App Files Files Community

PraneshJs commited on Aug 13

Commit

996bc38

verified ·

1 Parent(s): 6d0c2aa

Update inference_2.py

Browse files

Files changed (1) hide show

inference_2.py +48 -68

inference_2.py CHANGED Viewed

@@ -1,21 +1,15 @@
 import os
 import cv2
-import onnx
 import torch
 import numpy as np
-import argparse
-from models.TMC import ETMC
-from models import image
 from onnx2pytorch import ConvertModel
-import types
-# Load ONNX model and convert to PyTorch
-onnx_model = onnx.load('checkpoints/efficientnet.onnx')
-pytorch_model = ConvertModel(onnx_model)
 torch.manual_seed(42)
-# Audio model parameters
 audio_args = {
     'nb_samp': 64600,
     'first_conv': 1024,
@@ -25,50 +19,45 @@ audio_args = {
     'nb_fc_node': 1024,
     'gru_node': 1024,
     'nb_gru_layer': 3,
-    'nb_classes': 2
 }
-# Create a complete args object for RawNet
-audio_args_complete = {
-    **audio_args,
-    'pretrained_audio_encoder': False,
-    'freeze_audio_encoder': False,
-    'device': 'cpu'
-}
-audio_args_obj = types.SimpleNamespace(**audio_args_complete)
-# Load models
 spec_model = image.RawNet(audio_args_obj)
-spec_model_ckpt = torch.load('checkpoints/model.pth', map_location='cpu')
-spec_model.load_state_dict(spec_model_ckpt['spec_encoder'], strict=True)
-spec_model.eval()
-img_model = pytorch_model
-img_model_ckpt = torch.load('checkpoints/model.pth', map_location='cpu')
-img_model.load_state_dict(img_model_ckpt['rgb_encoder'], strict=True)
 img_model.eval()
 # Preprocessing functions
 def preprocess_img(face):
     face = face / 255.0
     face = cv2.resize(face, (256, 256))
-    face_pt = torch.unsqueeze(torch.Tensor(face), dim=0)
-    return face_pt
 def preprocess_audio(audio_file):
-    audio_pt = torch.unsqueeze(torch.Tensor(audio_file), dim=0)
-    return audio_pt
 def preprocess_video(input_video, n_frames=3):
     v_cap = cv2.VideoCapture(input_video)
     v_len = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
     sample = np.linspace(0, v_len - 1, n_frames).astype(int)
     frames = []
     for j in range(v_len):
         success = v_cap.grab()
         if j in sample:
@@ -81,52 +70,43 @@ def preprocess_video(input_video, n_frames=3):
     v_cap.release()
     return frames
 # Prediction functions
 def deepfakes_spec_predict(input_audio):
-    x, _ = input_audio
-    audio = preprocess_audio(x)
-    spec_grads = spec_model(audio)
-    spec_grads_np = np.squeeze(spec_grads.detach().cpu().numpy())
     if spec_grads_np[0] > 0.5:
-        text2 = f"The audio is REAL."
     else:
-        text2 = f"The audio is FAKE."
-    return text2
 def deepfakes_image_predict(input_image):
-    face = preprocess_img(input_image)
-    img_grads = img_model(face)
-    img_grads_np = np.squeeze(img_grads.detach().cpu().numpy())
     if img_grads_np[0] > 0.5:
-        preds = round(img_grads_np[0] * 100, 3)
-        text2 = f"The image is REAL. \nConfidence score: {preds}%"
     else:
-        preds = round(img_grads_np[1] * 100, 3)
-        text2 = f"The image is FAKE. \nConfidence score: {preds}%"
-    return text2
 def deepfakes_video_predict(input_video):
-    video_frames = preprocess_video(input_video)
-    real_faces_list, fake_faces_list = [], []
-    for face in video_frames:
-        img_grads = img_model(face)
-        img_grads_np = np.squeeze(img_grads.detach().cpu().numpy())
-        real_faces_list.append(img_grads_np[0])
-        fake_faces_list.append(img_grads_np[1])
-    real_faces_mean = np.mean(real_faces_list)
-    fake_faces_mean = np.mean(fake_faces_list)
-    if real_faces_mean > 0.5:
-        preds = round(real_faces_mean * 100, 3)
-        text2 = f"The video is REAL. \nConfidence score: {preds}%"
     else:
-        preds = round(fake_faces_mean * 100, 3)
-        text2 = f"The video is FAKE. \nConfidence score: {preds}%"
-    return text2

 import os
 import cv2
 import torch
 import numpy as np
+from onnx import load as onnx_load
 from onnx2pytorch import ConvertModel
+from models import image  # Your RawNet audio model
+# Set seed for reproducibility
 torch.manual_seed(42)
+# Audio args for RawNet
 audio_args = {
     'nb_samp': 64600,
     'first_conv': 1024,
     'nb_fc_node': 1024,
     'gru_node': 1024,
     'nb_gru_layer': 3,
+    'nb_classes': 2,
+    'device': 'cpu',
+    'pretrained_audio_encoder': False
 }
+# Convert audio_args dict to a namespace object
+from types import SimpleNamespace
+audio_args_obj = SimpleNamespace(**audio_args)
+# Load ONNX → PyTorch model for images
+onnx_model = onnx_load("checkpoints/efficientnet.onnx")
+img_model = ConvertModel(onnx_model)  # do NOT use strict=True (not supported)
+# Load Audio model
 spec_model = image.RawNet(audio_args_obj)
+# Ensure models are in eval mode
 img_model.eval()
+spec_model.eval()
+# -------------------------
 # Preprocessing functions
+# -------------------------
 def preprocess_img(face):
     face = face / 255.0
     face = cv2.resize(face, (256, 256))
+    face_tensor = torch.unsqueeze(torch.Tensor(face), dim=0)
+    return face_tensor
 def preprocess_audio(audio_file):
+    audio_tensor = torch.unsqueeze(torch.Tensor(audio_file), dim=0)
+    return audio_tensor
 def preprocess_video(input_video, n_frames=3):
     v_cap = cv2.VideoCapture(input_video)
     v_len = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
     sample = np.linspace(0, v_len - 1, n_frames).astype(int)
     frames = []
     for j in range(v_len):
         success = v_cap.grab()
         if j in sample:
     v_cap.release()
     return frames
+# -------------------------
 # Prediction functions
+# -------------------------
 def deepfakes_spec_predict(input_audio):
+    audio_tensor = preprocess_audio(input_audio)
+    spec_grads = spec_model.forward(audio_tensor)
+    spec_grads_np = np.squeeze(spec_grads.cpu().detach().numpy())
     if spec_grads_np[0] > 0.5:
+        return "The audio is REAL."
     else:
+        return "The audio is FAKE."
 def deepfakes_image_predict(input_image):
+    face_tensor = preprocess_img(input_image)
+    img_grads = img_model.forward(face_tensor)
+    img_grads_np = np.squeeze(img_grads.cpu().detach().numpy())
     if img_grads_np[0] > 0.5:
+        return f"The image is REAL. Confidence score: {round(img_grads_np[0]*100,2)}%"
     else:
+        return f"The image is FAKE. Confidence score: {round(img_grads_np[1]*100,2)}%"
 def deepfakes_video_predict(input_video):
+    frames = preprocess_video(input_video)
+    real_list, fake_list = [], []
+    for frame in frames:
+        img_grads = img_model.forward(frame)
+        img_grads_np = np.squeeze(img_grads.cpu().detach().numpy())
+        real_list.append(img_grads_np[0])
+        fake_list.append(img_grads_np[1])
+    real_mean = np.mean(real_list)
+    fake_mean = np.mean(fake_list)
+    if real_mean > 0.5:
+        return f"The video is REAL. Confidence: {round(real_mean*100,2)}%"
     else:
+        return f"The video is FAKE. Confidence: {round(fake_mean*100,2)}%"