File size: 1,426 Bytes
18ca2a9
 
 
6b1177a
18ca2a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75ecf13
 
 
 
 
18ca2a9
75ecf13
18ca2a9
 
 
 
6b1177a
18ca2a9
 
 
 
89ab2bc
 
 
18ca2a9
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import os
import sys

import streamlit as st
from huggingface_hub import snapshot_download
from transformers import AutoTokenizer


LOCAL_PATH = snapshot_download("flax-community/clip-spanish")
sys.path.append(LOCAL_PATH)

from modeling_hybrid_clip import FlaxHybridCLIP
from test_on_image import run_inference


def save_file_to_disk(uplaoded_file):
    temp_file = os.path.join("/tmp", uplaoded_file.name)
    with open(temp_file,"wb") as f:
        f.write(uploaded_file.getbuffer())
    return temp_file

def load_tokenizer_and_model():
    # load the saved model
    tokenizer = AutoTokenizer.from_pretrained("dccuchile/bert-base-spanish-wwm-cased")
    model = FlaxHybridCLIP.from_pretrained(LOCAL_PATH)
    return tokenizer, model

tokenizer, model = load_tokenizer_and_model()

st.title("Image-Caption Matching")
uploaded_file = st.file_uploader("Choose an image...", type="jpg")
text_input = st.text_input("Type a caption")

if uploaded_file is not None and text_input:
    local_image_path = None
    try:
        local_image_path = save_file_to_disk(uploaded_file)
        score = run_inference(local_image_path, text_input, model, tokenizer).tolist()
        st.image(uploaded_file, caption=text_input, width=None, use_column_width=None, clamp=False, channels='RGB', output_format='auto')
        st.write(f"## Score: {score:.2f}")
    finally:
        if local_image_path:
            os.remove(local_image_path)