from huggingface_hub import from_pretrained_keras
import tensorflow as tf
import gradio as gr

# download the model in the global context
vis_model = from_pretrained_keras("ariG23498/involution")

def infer(test_image):
	# convert the image to a tensorflow tensor and resize the image
	# to a constant 32x32
	image = tf.constant(test_image)
	image = tf.image.resize(image, (32, 32))
	
	# Use the model and get the activation maps
	(inv1_out, inv2_out, inv3_out) = vis_model.predict(image[None, ...])
	_, inv1_kernel = inv1_out
	_, inv2_kernel = inv2_out
	_, inv3_kernel = inv3_out

	inv1_kernel = tf.reduce_sum(inv1_kernel, axis=[-1, -2, -3])
	inv2_kernel = tf.reduce_sum(inv2_kernel, axis=[-1, -2, -3])
	inv3_kernel = tf.reduce_sum(inv3_kernel, axis=[-1, -2, -3])

	return (
		inv1_kernel[0, ..., None],
		inv2_kernel[0, ..., None],
		inv3_kernel[0, ..., None]
	)

iface = gr.Interface(
	fn=infer,
	title = "Involutional Neural Networks",
	description = 
		"""Authors: [Aritra Roy Gosthipaty](https://twitter.com/ariG23498) and [Ritwik Raha](https://twitter.com/ritwik_raha)
		Paper: [Involution: Inverting the Inherence of Convolution for Visual Recognition](https://arxiv.org/abs/2103.06255)
		""",
	inputs=gr.inputs.Image(label="Input Image"),
	outputs=[
		gr.outputs.Image(label="Activation from Kernel 1"),
		gr.outputs.Image(label="Activation from Kernel 2"),
		gr.outputs.Image(label="Activation from Kernel 3"),
	],
	examples=[["examples/llama.jpeg"], ["examples/dalai-lamao.jpeg"]],
).launch()