import onnxruntime as ort import numpy import gradio as gr from PIL import Image ort_sess = ort.InferenceSession('tiny_doodle_embedding.onnx') # force reload now! def get_bounds(img): # Assumes a BLACK BACKGROUND! # White letters on a black background! left = img.shape[1] right = 0 top = img.shape[0] bottom = 0 min_color = numpy.min(img) max_color = numpy.max(img) mean_color = 0.5*(min_color+max_color) # Do this the dumb way. for y in range(0, img.shape[0]): for x in range(0, img.shape[1]): if img[y,x] > mean_color: left = min(left, x) right = max(right, x) top = min(top, y) bottom = max(bottom, y) return (top, bottom, left, right) def resize_maxpool(img, out_width: int, out_height: int): out = numpy.zeros((out_height, out_width), dtype=img.dtype) scale_factor_y = img.shape[0] // out_height scale_factor_x = img.shape[1] // out_width for y in range(0, out.shape[0]): for x in range(0, out.shape[1]): out[y,x] = numpy.max(img[y*scale_factor_y:(y+1)*scale_factor_y, x*scale_factor_x:(x+1)*scale_factor_x]) return out def process_input(input_msg): img = input_msg["composite"] # Image is inverted. 255 is white, 0 is what's drawn. img_mean = 0.5 * (numpy.max(img) + numpy.min(img)) img = 1.0 * (img < img_mean) # Invert the image and convert to a float. crop_area = get_bounds(img) img = img[crop_area[0]:crop_area[1], crop_area[2]:crop_area[3]] img = resize_maxpool(img, 32, 32) #img_a = numpy.resize(img_a, (32, 32)) img = numpy.expand_dims(img, axis=0) # Unsqueeze return img def compare(input_img_a, input_img_b): text_out = "" img_a = process_input(input_img_a) img_b = process_input(input_img_b) # We could vcat these and run them in parallel. a_embedding = ort_sess.run(None, {'input': img_a.astype(numpy.float32)})[0] b_embedding = ort_sess.run(None, {'input': img_b.astype(numpy.float32)})[0] a_mag = 1.0#+numpy.dot(a_embedding, a_embedding.T) b_mag = 1.0#+numpy.dot(b_embedding, b_embedding.T) a_embedding /= a_mag b_embedding /= b_mag text_out += f"img_a_embedding: {a_embedding}\n" text_out += f"img_b_embedding: {b_embedding}\n" sim = numpy.dot(a_embedding , b_embedding.T) print(sim) print(text_out) return Image.fromarray(numpy.clip((numpy.hstack([img_a[0], img_b[0]]) * 254), 0, 255).astype(numpy.uint8)), sim[0][0], text_out #return sim[0][0], text_out demo = gr.Interface( fn=compare, inputs=[ gr.Sketchpad(image_mode='L', type='numpy'), gr.Sketchpad(image_mode='L', type='numpy'), #gr.ImageEditor( # width=320, height=320, # canvas_size=(320, 320), # sources = ["upload", "clipboard"], # Webcam # layers=False, # image_mode='L', type='numpy', #), ], outputs=["image", "number", "text"], ) demo.launch(share=True)