Spaces:

biglab
/

uiclip-comparisondemo

Sleeping

App Files Files Community

uiclip-comparisondemo / app.py

Jsonwu

Upload 9 files

874f930 verified 3 months ago

raw

history blame contribute delete

4.98 kB

	import gradio as gr
	from transformers import CLIPProcessor, CLIPModel
	from PIL import Image
	import torch
	import torch.nn.functional as F

	# Load the original CLIP model and processor
	model_original = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
	processor_original = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

	# Load the custom variants of the CLIP model
	model_variant_1 = CLIPModel.from_pretrained("biglab/uiclip_jitteredwebsites-2-224-paraphrased")
	model_variant_2 = CLIPModel.from_pretrained("biglab/uiclip_jitteredwebsites-2-224-paraphrased_webpairs")
	model_variant_3 = CLIPModel.from_pretrained("biglab/uiclip_jitteredwebsites-2-224-paraphrased_webpairs_humanpairs")

	# Define the function to process inputs and run inference with all four models
	def clip_predict_comparison(image1, image2, text):
	# Preprocess the image and text inputs using the original processor for all models
	inputs_1 = processor_original(text=[text], images=image1, return_tensors="pt")
	inputs_2 = processor_original(text=[text], images=image2, return_tensors="pt")

	def compute_similarity_with_softmax(model, inputs_1, inputs_2):
	# Compute similarity for image 1
	with torch.no_grad():
	outputs_image1 = model(**inputs_1)
	similarity_image1 = outputs_image1.logits_per_image.item()

	# Compute similarity for image 2
	with torch.no_grad():
	outputs_image2 = model(**inputs_2)
	similarity_image2 = outputs_image2.logits_per_image.item()

	# Apply softmax to normalize the scores between the two images
	similarities = torch.tensor([similarity_image1, similarity_image2])
	normalized_scores = F.softmax(similarities, dim=0)

	result = f"Image 1: {normalized_scores[0].item():.4f}, Image 2: {normalized_scores[1].item():.4f}"

	return normalized_scores[0].item(), normalized_scores[1].item(), result

	# Compute similarities for all four models
	similarity_original_1, similarity_original_2, result_original = compute_similarity_with_softmax(model_original, inputs_1, inputs_2)
	similarity_variant_1_1, similarity_variant_1_2, result_variant_1 = compute_similarity_with_softmax(model_variant_1, inputs_1, inputs_2)
	similarity_variant_2_1, similarity_variant_2_2, result_variant_2 = compute_similarity_with_softmax(model_variant_2, inputs_1, inputs_2)
	similarity_variant_3_1, similarity_variant_3_2, result_variant_3 = compute_similarity_with_softmax(model_variant_3, inputs_1, inputs_2)

	# Return the normalized similarity scores from all models along with the comparison result
	return (
	f"Original CLIP: {result_original}",
	f"UIClip: {result_variant_1}",
	f"UIClip + Webpairs: {result_variant_2}",
	f"UIClip + Webpairs + Humanpairs: {result_variant_3}"
	)

	# Example inputs (paths to image files and corresponding text descriptions)
	examples = [
	["testcases/original.png", "testcases/bigtitle.png", "ui screenshot. well-designed. e-commerce shopping app"],
	["testcases/original.png", "testcases/formaterror.png", "ui screenshot. well-designed. e-commerce shopping app"],
	["testcases/original.png", "testcases/greybackground.png", "ui screenshot. well-designed. e-commerce shopping app"],
	["testcases/wiki-original.png", "testcases/wiki-color.png", "ui screenshot. well-designed. page displaying information about neon"],
	["testcases/wiki-original.png", "testcases/wiki-font.png", "ui screenshot. well-designed. page displaying information about neon"],
	["testcases/wiki-original.png", "testcases/wiki-layout.png", "ui screenshot. well-designed. page displaying information about neon"],
	]

	# Set up the Gradio interface
	demo = gr.Interface(
	fn=clip_predict_comparison,
	inputs=[
	gr.Image(type="pil", label="Upload Image 1", height=400), # First image input with max height 400px
	gr.Image(type="pil", label="Upload Image 2", height=400), # Second image input with max height 400px
	gr.Textbox(label="Enter text description") # Text input
	],
	outputs=[
	gr.Textbox(label="OpenAI CLIP"), # Output for the original model
	gr.Textbox(label="UIClip"), # Output for variant 1
	gr.Textbox(label="UIClip + Webpairs"), # Output for variant 2
	gr.Textbox(label="UIClip + Webpairs + Humanpairs") # Output for variant 3
	],
	title="Score and Compare the Design Quality of two UI Screenshots",
	description="Upload two UI screenshots and provide a prompt in the format \"ui screenshot. well-designed. DESCRIPTION\". A generic description such as \"mobile app screen\" can also be used. The pair of screenshots are scored with CLIP and three variants of UIClip. The numbers in the output pane represent that probability (normalized via softmax) that one image is better designed than the other.",
	examples=examples # Include the example inputs
	)

	# Launch the Gradio demo app
	demo.launch()