Spaces:

PsalmsJava
/

voice-emotion-api

Sleeping

PsalmsJava commited on 25 days ago

Commit

fe1e779

verified ·

1 Parent(s): f7c744d

Update app/model.py

Files changed (1) hide show

app/model.py CHANGED Viewed

@@ -1,13 +1,15 @@
 import torch
-from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2ForSequenceClassification
 device = "cpu"
 model = None
 feature_extractor = None
-EMOTIONS = ["angry", "happy", "sad", "neutral"]
 def load_models():
     global model, feature_extractor
@@ -16,12 +18,16 @@ def load_models():
             "superb/wav2vec2-base-superb-er"
         )
-        model = Wav2Vec2ForSequenceClassification.from_pretrained(
             "superb/wav2vec2-base-superb-er"
         ).to(device)
 def predict(audio):
     inputs = feature_extractor(
         audio,
         sampling_rate=16000,
@@ -30,15 +36,18 @@ def predict(audio):
     )
     with torch.no_grad():
-        logits = model(**inputs).logits
-    probs = torch.nn.functional.softmax(logits, dim=1).numpy()[0]
-    idx = int(probs.argmax())
     return {
-        "primary_emotion": idx,
-        "emotion_label": EMOTIONS[idx],
-        "confidence": float(probs[idx]),
-        "scores": probs.tolist()
     }

 import torch
+from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2Model
+import numpy as np
+from app.features import extract_features
+from app.classifier import simple_rule_classifier
 device = "cpu"
 model = None
 feature_extractor = None
 def load_models():
     global model, feature_extractor
             "superb/wav2vec2-base-superb-er"
         )
+        model = Wav2Vec2Model.from_pretrained(
             "superb/wav2vec2-base-superb-er"
         ).to(device)
 def predict(audio):
+    # ---- Tone features ----
+    tone_features = extract_features(audio)
+    # ---- Deep embeddings ----
     inputs = feature_extractor(
         audio,
         sampling_rate=16000,
     )
     with torch.no_grad():
+        outputs = model(**inputs)
+    embeddings = outputs.last_hidden_state.mean(dim=1).numpy()[0]
+    # ---- Combine ----
+    combined = np.hstack([tone_features, embeddings])
+    # ---- Classify ----
+    emotion, confidence = simple_rule_classifier(tone_features)
     return {
+        "emotion_label": emotion,
+        "confidence": confidence,
+        "note": "Tone-based prediction (less text bias)"
     }