ImageAPI / app.py
nimnim's picture
commented out NA
61b336c
import gradio as gr
import cv2
import torch
import data
from models import imagebind_model
from models.imagebind_model import ModalityType
def read_dict_from_file(filename):
text_list = []
dictionary = {}
with open(filename, 'r') as file:
for line in file:
line = line.strip()
if line:
key, value = line.split(':', 1)
dictionary[key.strip()] = value.strip()
text_list.append(key)
return dictionary, text_list
text_output_path = 'output.txt'
database, text_list = read_dict_from_file(text_output_path)
# text_list.append("N/A")
# database["N/A"] = "NA"
image_paths = ['images/captured_image.jpg']
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# Instantiate model
model = imagebind_model.imagebind_huge(pretrained=True)
model.eval()
model.to(device)
def run_model():
inputs = {
ModalityType.TEXT: data.load_and_transform_text(text_list, device),
ModalityType.VISION: data.load_and_transform_vision_data(image_paths, device),
}
with torch.no_grad():
embeddings = model(inputs)
embeddings_matrix = torch.softmax(embeddings[ModalityType.VISION] @ embeddings[ModalityType.TEXT].T, dim=-1)
model_image_to_text = torch.argmax(embeddings_matrix, dim=1)
label_product = text_list[model_image_to_text[0]]
return label_product, database[label_product]
def predict(image):
# Save the image to the desired file path
cv2.imwrite(image_paths[0], cv2.cvtColor(image, cv2.COLOR_RGB2BGR))
return run_model()
demo = gr.Interface(
fn=predict,
inputs=gr.inputs.Image(),
outputs=["text", "text"]
)
demo.launch()