import torch import clip from PIL import Image import gradio as gr device = "cuda" if torch.cuda.is_available() else "cpu" model, preprocess = clip.load("ViT-B/32", device=device) def hotornot(image, gender): image = Image.fromarray(image.astype("uint8"), "RGB") image = preprocess(image).unsqueeze(0).to(device) positive_terms = [f'a hot {gender}', f'a beautiful {gender}', f'an attractive {gender}'] negative_terms = [f'a gross {gender}', f'an ugly {gender}', f'a hideous {gender}'] pairs = list(zip(positive_terms, negative_terms)) def evaluate(terms): text = clip.tokenize(terms).to(device) with torch.no_grad(): logits_per_image, logits_per_text = model(image, text) probs = logits_per_image.softmax(dim=-1).cpu().numpy() return probs[0] probs = [evaluate(pair) for pair in pairs] positive_probs = [prob[0] for prob in probs] negative_probs = [prob[1] for prob in probs] hotness_score = round((probs[0][0] - probs[0][1] + 1) * 50, 2) beauty_score = round((probs[1][0] - probs[1][1] + 1) * 50, 2) attractiveness_score = round((probs[2][0] - probs[2][1] + 1) * 50, 2) hot_score = sum(positive_probs)/len(positive_probs) ugly_score = sum(negative_probs)/len(negative_probs) composite = ((hot_score - ugly_score)+1) * 50 composite = round(composite, 2) return composite, hotness_score, beauty_score, attractiveness_score iface = gr.Interface( fn=hotornot, inputs=[ gr.inputs.Image(label="Image"), gr.inputs.Dropdown( [ 'person', 'man', 'woman' ], default='person', ) ], outputs=[ gr.Textbox(label="Total Hot or Notâ„¢ Score"), gr.Textbox(label="Hotness Score"), gr.Textbox(label="Beauty Score"), gr.Textbox(label="Attractiveness Score"), ], title="Hot or Not", description="A simple hot or not app using OpenAI's CLIP model. How it works: the input image is passed to OpenAI's CLIP image captioning model and evaluated for how much it conforms to the model's idea of hotness, beauty, and attractiveness. These values are then combined to produce a composite score on a scale of 0 to 100.", ) iface.launch()