clip-rsicd-demo / utils.py
Sujit Pal
fix: fixing result output and bypassing large files problem
17476c1
import json
import matplotlib.pyplot as plt
import nmslib
import numpy as np
import os
import streamlit as st
from PIL import Image
from transformers import CLIPProcessor, FlaxCLIPModel
@st.cache(allow_output_mutation=True)
def load_index(image_vector_file):
filenames, image_vecs = [], []
fvec = open(image_vector_file, "r")
for line in fvec:
cols = line.strip().split('\t')
filename = cols[0]
image_vec = np.array([float(x) for x in cols[1].split(',')])
filenames.append(filename)
image_vecs.append(image_vec)
V = np.array(image_vecs)
index = nmslib.init(method='hnsw', space='cosinesimil')
index.addDataPointBatch(V)
index.createIndex({'post': 2}, print_progress=True)
return filenames, index
@st.cache(allow_output_mutation=True)
def load_model(model_path, baseline_model):
model = FlaxCLIPModel.from_pretrained(model_path)
# processor = CLIPProcessor.from_pretrained(baseline_model)
processor = CLIPProcessor.from_pretrained(model_path)
return model, processor
@st.cache(allow_output_mutation=True)
def load_captions(caption_file):
image2caption = {}
with open(caption_file, "r") as fcap:
for line in fcap:
data = json.loads(line.strip())
filename = data["filename"]
captions = data["captions"]
image2caption[filename] = captions
return image2caption