Spaces:

Kabatubare
/

voice_clone_detection_v1

Paused

App Files Files Community

Kabatubare commited on Mar 13

Commit

24baf79

•

1 Parent(s): ac57bd2

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -16

app.py CHANGED Viewed

@@ -1,27 +1,53 @@
 import gradio as gr
-from transformers import pipeline
-# Initialize the pipeline for audio classification
-# Ensure you have the transformers library installed
-model_pipeline = pipeline("audio-classification", model="Kabatubare/ast_celeb_spoof")
 def predict_voice(audio_file):
-    predictions = model_pipeline(audio_file.name)
-    # Format the predictions for display
-    formatted_predictions = [f"Label: {prediction['label']}, Confidence: {prediction['score']:.4f}" for prediction in predictions]
-    return "\n".join(formatted_predictions)
-# Define the Gradio interface
 iface = gr.Interface(
-    fn=predict_voice,
-    inputs=gr.Audio(source="upload", type="file", label="Upload Audio File"),
-    outputs=gr.Text(label="Predictions"),
     title="Voice Authenticity Detection",
-    description="This model detects whether a voice is real or AI-generated. Upload an audio file to get started.",
-    allow_flagging="never",
     theme="huggingface"
 )
-# Launch the Gradio app
 iface.launch()

 import gradio as gr
+from transformers import AutoFeatureExtractor, AutoModelForAudioClassification
+import numpy as np
+# Path to the local directory where the model files are stored within the Space
+local_model_path = "./"
+# Initialize the feature extractor and model from the local files
+extractor = AutoFeatureExtractor.from_pretrained(local_model_path)
+model = AutoModelForAudioClassification.from_pretrained(local_model_path)
 def predict_voice(audio_file):
+    """
+    Predicts whether a voice is real or spoofed from an audio file.
+    Args:
+        audio_file: The input audio file to be classified.
+    Returns:
+        A string with the prediction and confidence level.
+    """
+    # Convert the input audio file to model's expected format.
+    inputs = extractor(audio_file, return_tensors="pt")
+    # Generate predictions from the model.
+    outputs = model(**inputs)
+    # Extract logits and compute the class with the highest score.
+    logits = outputs.logits
+    predicted_index = np.argmax(logits.detach().numpy())
+    # Translate index to label
+    label = model.config.id2label[predicted_index]
+    # Calculate the confidence of the prediction.
+    confidence = np.max(np.softmax(logits.detach().numpy(), axis=1)) * 100
+    # Prepare the output string.
+    result = f"The voice is classified as '{label}' with a confidence of {confidence:.2f}%."
+    return result
+# Setting up the Gradio interface
 iface = gr.Interface(
+    fn=predict_voice,  # Function to call
+    inputs=gr.inputs.Audio(source="upload", type="file", label="Upload Audio File"),  # Audio input
+    outputs="text",  # Text output
     title="Voice Authenticity Detection",
+    description="Detects whether a voice is real or AI-generated. Upload an audio file to see the results.",
     theme="huggingface"
 )
+# Run the Gradio interface
 iface.launch()