jvcanavarro's picture
Add source code
0dfe33d
raw
history blame contribute delete
964 Bytes
from . import audiovisual_stream
import chainer.serializers
import librosa
import numpy
import skvideo.io
import numpy as np
FRAMES_LIMIT = 25
def load_audio(data):
return librosa.load(data, 16000)[0][None, None, None, :]
def load_model():
model = audiovisual_stream.ResNet18().to_cpu()
chainer.serializers.load_npz("src/model", model)
return model
def predict_traits(data, model):
video_features = skvideo.io.vreader(data, num_frames=27)
# video_features = skvideo.io.vreader(data)
audio_features = load_audio(data)
x = []
predictions = []
frame_count = 0
for frame in video_features:
x.append(numpy.rollaxis(frame, 2))
frame_count += 1
if frame_count == FRAMES_LIMIT:
x = [audio_features, numpy.array(x, "float32")]
predictions.append(model(x))
frame_count = 0
x = []
return np.mean(np.asarray(predictions), axis=0)