from emo_gen import EmotionModel, process_func import librosa import numpy as np import torch from transformers import Wav2Vec2Processor def get_emo(path): model_name = "./emotional/wav2vec2-large-robust-12-ft-emotion-msp-dim" device = "cuda" if torch.cuda.is_available() else "cpu" processor = Wav2Vec2Processor.from_pretrained(model_name) model = EmotionModel.from_pretrained(model_name).to(device) wav, sr = librosa.load(path, 16000) # device = config.bert_gen_config.device return process_func( np.expand_dims(wav, 0).astype(float), sr, model, processor, device, embeddings=True, ).squeeze(0)