Spaces:

amphion
/

DeepfakeDetection

Running on Zero

App Files Files Community

wli3221134 commited on Jan 5

Commit

0aed679

verified ·

1 Parent(s): 24fa17c

Upload 8 files

Browse files

Files changed (2) hide show

app.py +44 -12
env.sh +26 -0

app.py CHANGED Viewed

@@ -27,46 +27,78 @@ checkpoint_path = load_model()
 @spaces.GPU
 def detect_on_gpu(dataset):
     """在 GPU 上进行音频伪造检测"""
-    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    device = 'cpu'
     model = Wav2Vec2BERT_Llama().to(device)
-    # 加载模型权重
     checkpoint = torch.load(checkpoint_path, map_location=device)
     model_state_dict = checkpoint['model_state_dict']
     threshold = 0.9996
     # 处理模型状态字典的 key
     if hasattr(model, 'module') and not any(key.startswith('module.') for key in model_state_dict.keys()):
         model_state_dict = {'module.' + key: value for key, value in model_state_dict.items()}
     elif not hasattr(model, 'module') and any(key.startswith('module.') for key in model_state_dict.keys()):
         model_state_dict = {key.replace('module.', ''): value for key, value in model_state_dict.items()}
     model.load_state_dict(model_state_dict)
     model.eval()
     with torch.no_grad():
-        for batch in dataset:
             main_features = {
                 'input_features': batch['main_features']['input_features'].to(device),
                 'attention_mask': batch['main_features']['attention_mask'].to(device)
             }
-            prompt_features = [{
-                'input_features': pf['input_features'].to(device),
-                'attention_mask': pf['attention_mask'].to(device)
-            } for pf in batch['prompt_features']]
-            prompt_labels = batch['prompt_labels'].to(device)
             outputs = model({
                 'main_features': main_features,
                 'prompt_features': prompt_features,
                 'prompt_labels': prompt_labels
             })
             avg_scores = outputs['avg_logits'].softmax(dim=-1)
             deepfake_scores = avg_scores[:, 1].cpu()
-            is_fake = deepfake_scores[0] > threshold
-            result = {"is_fake": is_fake, "confidence": deepfake_scores[0]}
     return result
 # 修改音频伪造检测主函数
@@ -84,7 +116,7 @@ def audio_deepfake_detection(demonstrations, query_audio_path):
     return {
         "Is AI Generated": result["is_fake"],
-        "Confidence": f"{result['confidence']:.2f}%"
     }
 # Gradio 界面

 @spaces.GPU
 def detect_on_gpu(dataset):
     """在 GPU 上进行音频伪造检测"""
+    print("\n=== 开始音频检测 ===")
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"使用设备: {device}")
+    print("正在初始化模型...")
     model = Wav2Vec2BERT_Llama().to(device)
+    print(f"正在加载模型权重: {checkpoint_path}")
     checkpoint = torch.load(checkpoint_path, map_location=device)
     model_state_dict = checkpoint['model_state_dict']
     threshold = 0.9996
+    print(f"检测阈值设置为: {threshold}")
     # 处理模型状态字典的 key
     if hasattr(model, 'module') and not any(key.startswith('module.') for key in model_state_dict.keys()):
+        print("添加 'module.' 前缀到状态字典的 key")
         model_state_dict = {'module.' + key: value for key, value in model_state_dict.items()}
     elif not hasattr(model, 'module') and any(key.startswith('module.') for key in model_state_dict.keys()):
+        print("移除状态字典 key 中的 'module.' 前缀")
         model_state_dict = {key.replace('module.', ''): value for key, value in model_state_dict.items()}
     model.load_state_dict(model_state_dict)
     model.eval()
+    print("模型加载完成，进入评估模式")
+    print("\n开始处理音频数据...")
     with torch.no_grad():
+        for batch_idx, batch in enumerate(dataset):
+            print(f"\n处理批次 {batch_idx + 1}")
+            print("准备主特征...")
             main_features = {
                 'input_features': batch['main_features']['input_features'].to(device),
                 'attention_mask': batch['main_features']['attention_mask'].to(device)
             }
+            print(f"主特征形状: {main_features['input_features'].shape}")
+            if len(batch['prompt_features']) > 0:
+                print("\n准备提示特征...")
+                prompt_features = [{
+                    'input_features': pf['input_features'].to(device),
+                    'attention_mask': pf['attention_mask'].to(device)
+                } for pf in batch['prompt_features']]
+                print(f"提示特征数量: {len(prompt_features)}")
+                print(f"第一个提示特征形状: {prompt_features[0]['input_features'].shape}")
+                print("\n准备提示标签...")
+                prompt_labels = batch['prompt_labels'].to(device)
+                print(f"提示标签形状: {prompt_labels.shape}")
+                print(f"提示标签值: {prompt_labels}")
+            else:
+                prompt_features = []
+                prompt_labels = []
+            print("\n执行模型推理...")
             outputs = model({
                 'main_features': main_features,
                 'prompt_features': prompt_features,
                 'prompt_labels': prompt_labels
             })
+            print("\n处理模型输出...")
             avg_scores = outputs['avg_logits'].softmax(dim=-1)
             deepfake_scores = avg_scores[:, 1].cpu()
+            is_fake = deepfake_scores[0].item() > threshold
+            result = {"is_fake": is_fake, "confidence": deepfake_scores[0] if is_fake else 1-deepfake_scores[0]}
+            break
+    print("\n=== 检测完成 ===")
     return result
 # 修改音频伪造检测主函数
     return {
         "Is AI Generated": result["is_fake"],
+        "Confidence": f"{100*result['confidence']:.2f}%"
     }
 # Gradio 界面

env.sh ADDED Viewed

	@@ -0,0 +1,26 @@

+# Copyright (c) 2023 Amphion.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+# Raise error if any command fails
+set -e
+# Install ffmpeg in Linux
+conda install -c conda-forge ffmpeg
+# Pip packages
+pip install setuptools ruamel.yaml tqdm colorama easydict tabulate loguru json5 Cython unidecode inflect argparse g2p_en tgt librosa==0.9.1 matplotlib typeguard einops omegaconf hydra-core humanfriendly pandas munch
+pip install tensorboard tensorboardX torch==2.0.1 torchaudio==2.0.2 torchvision==0.15.2 accelerate==0.24.1 transformers==4.41.2 diffusers praat-parselmouth audiomentations pedalboard ffmpeg-python==0.2.0 pyworld diffsptk==1.0.1 nnAudio unidecode inflect ptwt
+pip install https://github.com/vBaiCai/python-pesq/archive/master.zip
+pip install fairseq
+pip install git+https://github.com/lhotse-speech/lhotse
+pip install black==24.1.1
+# Uninstall nvidia-cublas-cu11 if there exist some bugs about CUDA version
+# pip uninstall nvidia-cublas-cu11