import gradio as gr import torch # from diffusers import AutoPipelineForText2Image from diffusers import DiffusionPipeline from transformers import BlipProcessor, BlipForConditionalGeneration from pathlib import Path import stone import requests import io import os from PIL import Image import spaces import matplotlib.pyplot as plt import numpy as np from matplotlib.colors import hex2color from huggingface_hub import list_models # Fetch models from Hugging Face Hub models = list_models(task="text-to-image") ## Step 1: Filter the models filtered_models = [model for model in models if model.library_name == "diffusers"] # Step 2: Sort the filtered models by downloads in descending order sorted_models = sorted(filtered_models, key=lambda x: x.downloads, reverse=True) # Step 3: Select the top 5 models with only one model per company top_models = [] companies_seen = set() for model in sorted_models: company_name = model.id.split('/')[0] # Assuming the company name is the first part of the model id if company_name not in companies_seen: top_models.append(model) companies_seen.add(company_name) if len(top_models) == 5: break # Get the ids of the top models model_names = [model.id for model in top_models] print(model_names) # Initial pipeline setup default_model = model_names[0] print(default_model) pipeline_text2image = DiffusionPipeline.from_pretrained( default_model ) pipeline_text2image = pipeline_text2image.to("cuda") @spaces.GPU def getimgen(prompt): return pipeline_text2image( prompt=prompt, guidance_scale=0.0, num_inference_steps=2 ).images[0] blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large") blip_model = BlipForConditionalGeneration.from_pretrained( "Salesforce/blip-image-captioning-large", torch_dtype=torch.float16 ).to("cuda") @spaces.GPU def blip_caption_image(image, prefix): inputs = blip_processor(image, prefix, return_tensors="pt").to("cuda", torch.float16) out = blip_model.generate(**inputs) return blip_processor.decode(out[0], skip_special_tokens=True) def genderfromcaption(caption): cc = caption.split() if "man" in cc or "boy" in cc: return "Man" elif "woman" in cc or "girl" in cc: return "Woman" return "Unsure" def genderplot(genlist): order = ["Man", "Woman", "Unsure"] # Sort the list based on the order of keys words = sorted(genlist, key=lambda x: order.index(x)) # Define colors for each category colors = {"Man": "lightgreen", "Woman": "darkgreen", "Unsure": "lightgrey"} # Map each word to its corresponding color word_colors = [colors[word] for word in words] # Plot the colors in a grid with reduced spacing fig, axes = plt.subplots(2, 5, figsize=(5,5)) # Adjust spacing between subplots plt.subplots_adjust(hspace=0.1, wspace=0.1) for i, ax in enumerate(axes.flat): ax.set_axis_off() ax.add_patch(plt.Rectangle((0, 0), 1, 1, color=word_colors[i])) return fig def skintoneplot(hex_codes): # Convert hex codes to RGB values rgb_values = [hex2color(hex_code) for hex_code in hex_codes] # Calculate luminance for each color luminance_values = [0.299 * r + 0.587 * g + 0.114 * b for r, g, b in rgb_values] # Sort hex codes based on luminance in descending order (dark to light) sorted_hex_codes = [code for _, code in sorted(zip(luminance_values, hex_codes), reverse=True)] # Plot the colors in a grid with reduced spacing fig, axes = plt.subplots(2, 5, figsize=(5,5)) # Adjust spacing between subplots plt.subplots_adjust(hspace=0.1, wspace=0.1) for i, ax in enumerate(axes.flat): ax.set_axis_off() ax.add_patch(plt.Rectangle((0, 0), 1, 1, color=sorted_hex_codes[i])) return fig @spaces.GPU def generate_images_plots(prompt, model_name): print(model_name) # Update the pipeline to use the selected model global pipeline_text2image pipeline_text2image = DiffusionPipeline.from_pretrained( model_name ) pipeline_text2image = pipeline_text2image.to("cuda") foldername = "temp" # Generate 10 images images = [getimgen(prompt) for _ in range(10)] Path(foldername).mkdir(parents=True, exist_ok=True) genders = [] skintones = [] for image, i in zip(images, range(10)): prompt_prefix = "photo of a " caption = blip_caption_image(image, prefix=prompt_prefix) image.save(f"{foldername}/image_{i}.png") try: skintoneres = stone.process(f"{foldername}/image_{i}.png", return_report_image=False) tone = skintoneres['faces'][0]['dominant_colors'][0]['color'] skintones.append(tone) except: skintones.append(None) genders.append(genderfromcaption(caption)) print(genders, skintones) return images, skintoneplot(skintones), genderplot(genders) with gr.Blocks(title = "Skin Tone and Gender bias in Text to Image Models") as demo: gr.Markdown("# Skin Tone and Gender bias in Text to Image Models") model_dropdown = gr.Dropdown(label="Choose a model", choices=model_names, value=default_model) prompt = gr.Textbox(label="Enter the Prompt") gallery = gr.Gallery(label="Generated images", show_label=False, elem_id="gallery", columns=[5], rows=[2], object_fit="contain", height="auto") btn = gr.Button("Generate images", scale=0) with gr.Row(equal_height=True): skinplot = gr.Plot(label="Skin Tone") genplot = gr.Plot(label="Gender") btn.click(generate_images_plots, inputs=[prompt, model_dropdown], outputs=[gallery, skinplot, genplot]) demo.launch(debug=True)