File size: 1,055 Bytes
f2c3ec9
3d3ef8a
f2c3ec9
3d3ef8a
 
 
 
 
 
f2c3ec9
3d3ef8a
f2c3ec9
3d3ef8a
 
 
 
 
 
 
f2c3ec9
3d3ef8a
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import streamlit as st
from hf_inference import infer_multimodal_model

paths = {
    'text_model_path': 'files/bert-large-uncased_none_seed-42.pt',
    'video_model_path': 'files/XCLIP_Augmented.pt',
    'audio_model_path': 'files/1d_cnn_with_opensmile.pt',
    'multimodal_model_path': 'files/multimodal_model_with_early_fusion.pt'
}

label2emoji = {'anger': '😠', 'disgust': '🀒', 'fear': '😨', 'joy': 'πŸ˜„', 'neutral': '😢', 'sadness': 'πŸ˜”', 'surprise': '😯'}

uploaded_video = st.file_uploader('Upload your video')
text = st.text_input('Enter your text')
if uploaded_video is not None and text:
    bytes_data = uploaded_video.getvalue()
    video_path = 'input_video.mp4'
    with open(video_path, 'wb') as f:
        f.write(bytes_data)

    st.divider()
    st.subheader('Input Video')
    st.video(bytes_data)
    st.subheader('Input Text')
    st.write(text)
    label = infer_multimodal_model(text=text, video_path=video_path, model_pathes=paths)
    st.subheader('Video Emotion')
    st.write(f'{label} {label2emoji[label] * 3}')