Spaces:
Sleeping
Sleeping
| from transformers import pipeline | |
| import librosa | |
| import numpy as np | |
| classifier = None | |
| def load_model(): | |
| global classifier | |
| if classifier is None: | |
| classifier = pipeline( | |
| "audio-classification", | |
| model="Hemgg/Deepfake-audio-detection", | |
| device=-1 | |
| ) | |
| return classifier | |
| def detect_audio(y: np.ndarray) -> tuple[str, float, str]: | |
| """ | |
| Detect if audio is AI_GENERATED or HUMAN. | |
| Returns: classification, confidenceScore (0-1), explanation | |
| """ | |
| try: | |
| # ✅ Always ensure model is loaded | |
| model = load_model() | |
| # ✅ Pass correct input format | |
| result = model( | |
| { | |
| "array": y, | |
| "sampling_rate": 16000 | |
| } | |
| ) | |
| if not result: | |
| return "HUMAN", 0.50, "Insufficient audio features detected." | |
| # Take top prediction | |
| top = result[0] | |
| label_lower = top["label"].lower() | |
| top_score = top["score"] | |
| # Label mapping | |
| if any(word in label_lower for word in ["ai", "fake", "synthetic", "aivoice"]): | |
| classification = "AI_GENERATED" | |
| else: | |
| classification = "HUMAN" | |
| confidence = round(float(top_score), 3) | |
| # Feature analysis | |
| flatness = librosa.feature.spectral_flatness(y=y).mean() | |
| pitch = librosa.yin(y, fmin=75, fmax=300) | |
| pitch_std = np.std(pitch) if len(pitch) > 0 else 0.0 | |
| cues = [] | |
| if flatness > 0.5: | |
| cues.append("unnatural high spectral flatness (robotic)") | |
| else: | |
| cues.append("natural spectral variation") | |
| if pitch_std < 10: | |
| cues.append("unnatural pitch consistency") | |
| else: | |
| cues.append("natural pitch variation") | |
| # Feature vote | |
| feature_vote = ( | |
| "AI_GENERATED" | |
| if (flatness > 0.5 and pitch_std < 10) | |
| else "HUMAN" | |
| ) | |
| cues_text = " and ".join(cues) | |
| if feature_vote == classification: | |
| explanation = ( | |
| f"{cues_text}, which aligns with the model prediction " | |
| f"of {classification.lower()} voice." | |
| ) | |
| else: | |
| explanation = ( | |
| f"{cues_text}. However, the deep learning model detected " | |
| f"patterns consistent with {classification.lower()} voice." | |
| ) | |
| explanation = explanation.capitalize() | |
| return classification, confidence, explanation | |
| except Exception as e: | |
| return ( | |
| "HUMAN", | |
| 0.50, | |
| f"Analysis error: {str(e)}. Treated as human." | |
| ) | |