hercorners / voice_handler.py
walaa2022's picture
Upload 6 files
52d27b3 verified
import sounddevice as sd
import numpy as np
import librosa
import torch
from transformers import pipeline
class VoiceHandler:
def __init__(self):
self.sample_rate = 16000
self.emotion_classifier = pipeline("audio-classification",
model="ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition")
def record_audio(self, duration=5):
"""Record audio for specified duration"""
recording = sd.rec(int(duration * self.sample_rate),
samplerate=self.sample_rate,
channels=1)
sd.wait()
return recording
def process_audio(self, audio_data):
"""Process audio and detect emotion"""
# Convert to mono if needed
if len(audio_data.shape) > 1:
audio_data = np.mean(audio_data, axis=1)
# Normalize audio
audio_data = librosa.util.normalize(audio_data)
# Get emotion
emotion = self.emotion_classifier(audio_data)
return audio_data, emotion[0]['label']
def enhance_audio(self, audio_data):
"""Enhance audio quality"""
# Noise reduction
y = librosa.effects.preemphasis(audio_data)
# Normalize
y = librosa.util.normalize(y)
return y