from . import audiovisual_stream import chainer.serializers import librosa import numpy import skvideo.io import numpy as np FRAMES_LIMIT = 25 def load_audio(data): return librosa.load(data, 16000)[0][None, None, None, :] def load_model(): model = audiovisual_stream.ResNet18().to_cpu() chainer.serializers.load_npz("src/model", model) return model def predict_traits(data, model): video_features = skvideo.io.vreader(data, num_frames=27) # video_features = skvideo.io.vreader(data) audio_features = load_audio(data) x = [] predictions = [] frame_count = 0 for frame in video_features: x.append(numpy.rollaxis(frame, 2)) frame_count += 1 if frame_count == FRAMES_LIMIT: x = [audio_features, numpy.array(x, "float32")] predictions.append(model(x)) frame_count = 0 x = [] return np.mean(np.asarray(predictions), axis=0)