|
|
|
|
|
import gradio as gr |
|
import torch |
|
import numpy as np |
|
|
|
import matplotlib |
|
matplotlib.use("Agg") |
|
import matplotlib.pyplot as plt |
|
|
|
from PIL import Image |
|
import collections |
|
import numpy as np |
|
import pandas as pd |
|
import io |
|
import os |
|
from saac.prompt_generation.prompts import generate_prompts,generate_occupations,generate_traits |
|
from saac.prompt_generation.prompt_utils import score_prompt |
|
from saac.image_analysis.process import process_image_pil |
|
from saac.evaluation.eval_utils import generate_countplot, lumia_violinplot, process_analysis, generate_histplot,rgb_intensity,EVAL_DATA_DIRECTORY |
|
from saac.evaluation.evaluate import evaluate_gender_by_adjectives,evaluate_gender_by_occupation,evaluate_skin_by_adjectives,evaluate_skin_by_occupation |
|
from datasets import load_dataset |
|
from diffusers import DiffusionPipeline, PNDMScheduler |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
STABLE_MODELS = ["Stable Diffusion v1.5", "Midjourney"] |
|
results = dict() |
|
results[STABLE_MODELS[0]] = process_analysis(os.path.join(EVAL_DATA_DIRECTORY,'raw',"stable_diffusion_raw_processed.csv")) |
|
results[STABLE_MODELS[1]] = process_analysis(os.path.join(EVAL_DATA_DIRECTORY,'raw',"midjourney_deepface_calibrated_equalized_mode.csv")) |
|
|
|
scheduler = PNDMScheduler.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="scheduler", prediction_type="v_prediction") |
|
pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", scheduler=scheduler) |
|
pipe = pipe.to(device) |
|
|
|
tokenizer = pipe.tokenizer |
|
text_encoder = pipe.text_encoder |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
GENDERS = ["male", "female"] |
|
ETHNICITIES = ["black", "white", "asian"] |
|
LOOKS = sorted(list(generate_traits()['tag'])) |
|
JOBS = sorted(list(generate_occupations()['tag'])) |
|
RENDERPREFIX = "a high quality photo of a" |
|
|
|
def echoToken(token): |
|
res = getMostSimilar(tokenizer, text_encoder, token) |
|
return ",".join(res) |
|
|
|
def getEmbeddingForToken(tokenizer, token): |
|
token_ids = tokenizer.encode(token)[1:-1] |
|
if len(token_ids) != 1: |
|
print(len(token_ids)) |
|
raise |
|
token_id = token_ids[0] |
|
return token_id, text_encoder.get_input_embeddings().weight.data[token_id].unsqueeze(0) |
|
|
|
def getMostSimilar(tokenizer, text_encoder, token, numResults=50): |
|
internal_embs = text_encoder.text_model.embeddings.token_embedding.weight |
|
tID, tok = getEmbeddingForToken(tokenizer, token) |
|
|
|
cos = torch.nn.CosineSimilarity(dim=1, eps=1e-6) |
|
scores = cos(internal_embs.to("cpu").to(torch.float32), tok.to("cpu").to(torch.float32)) |
|
sorted_scores, sorted_ids = torch.sort(scores, descending=True) |
|
best_ids = sorted_ids[0:numResults].detach().numpy() |
|
best_scores = sorted_scores[0:numResults].detach().numpy() |
|
|
|
res = [] |
|
for best_id, best_score in zip(best_ids, best_scores): |
|
|
|
res.append("[" + tokenizer.decode(best_id) + "," + str(best_score) + "]") |
|
return res[1:] |
|
|
|
def computeTermSimilarity(tokenizer, text_encoder, termA, termB): |
|
inputs = tokenizer([termA, termB], padding=True, return_tensors="pt").to("cpu") |
|
outputs = text_encoder(**inputs) |
|
cos = torch.nn.CosineSimilarity(dim=-1, eps=1e-6) |
|
val = cos(outputs.pooler_output[0], outputs.pooler_output[1]).item() |
|
return float(val) |
|
|
|
def computeJob(tokenizer, text_encoder, job): |
|
res = {} |
|
neutralPrompt = " ".join([RENDERPREFIX, job]) |
|
titleText = neutralPrompt |
|
for gender in GENDERS: |
|
for ethnicity in ETHNICITIES: |
|
prompt = " ".join([RENDERPREFIX, ethnicity, gender, job]) |
|
val = computeTermSimilarity(tokenizer, text_encoder, prompt, neutralPrompt) |
|
res[prompt] = val |
|
|
|
return titleText, sorted(res.items(), reverse=True) |
|
|
|
def computeLook(tokenizer, text_encoder, look): |
|
res = {} |
|
titleText = " ".join([RENDERPREFIX, |
|
look, |
|
"[", |
|
"|".join(GENDERS), |
|
"]"]) |
|
|
|
for gender in GENDERS: |
|
neutralPromptGender = " ".join([RENDERPREFIX, look, gender]) |
|
for ethnicity in ETHNICITIES: |
|
prompt = " ".join([RENDERPREFIX, look, ethnicity, gender]) |
|
val = computeTermSimilarity(tokenizer, text_encoder, prompt, neutralPromptGender) |
|
res[prompt] = val |
|
|
|
return titleText, sorted(res.items(), reverse=True) |
|
|
|
|
|
def fig2img(fig): |
|
"""Convert a Matplotlib figure to a PIL Image and return it""" |
|
buf = io.BytesIO() |
|
fig.savefig(buf) |
|
buf.seek(0) |
|
img = Image.open(buf) |
|
return img |
|
|
|
def computePlot(title, results, scaleXAxis=True): |
|
x = list(map(lambda x:x[0], results)) |
|
y = list(map(lambda x:x[1], results)) |
|
|
|
fig, ax = plt.subplots(1, 1, figsize=(10, 5)) |
|
y_pos = np.arange(len(x)) |
|
|
|
hbars = ax.barh(y_pos, y, left=0, align='center') |
|
ax.set_yticks(y_pos, labels=x) |
|
ax.invert_yaxis() |
|
ax.set_xlabel('Cosine similarity - take care to note compressed X-axis') |
|
ax.set_title('Similarity to "' + title + '"') |
|
|
|
|
|
ax.bar_label(hbars, fmt='%.3f') |
|
minR = np.min(y) |
|
maxR = np.max(y) |
|
diffR = maxR-minR |
|
|
|
if scaleXAxis: |
|
ax.set_xlim(left=minR-0.1*diffR, right=maxR+0.1*diffR) |
|
else: |
|
ax.set_xlim(left=0.0, right=1.0) |
|
plt.tight_layout() |
|
plt.close() |
|
return fig2img(fig) |
|
|
|
def computeJobBias(job): |
|
title, results = computeJob(tokenizer, text_encoder, job) |
|
return computePlot(title, results) |
|
|
|
def computeLookBias(look): |
|
title, results = computeLook(tokenizer, text_encoder, look) |
|
return computePlot(title, results) |
|
def trait_graph(model,hist=True): |
|
tda_res,occ_res = results[model] |
|
pass_gen = evaluate_gender_by_adjectives(tda_res) |
|
pass_skin = evaluate_skin_by_adjectives(tda_res) |
|
fig = None |
|
if not hist: |
|
fig = generate_countplot(tda_res, 'tda_sentiment_val', 'gender_detected_val', |
|
title='Gender Count by Trait Sentiment', |
|
xlabel='Trait Sentiment', |
|
ylabel='Count', |
|
legend_title='Gender') |
|
else: |
|
df = tda_res |
|
df['tda_sentiment_val'] = pd.Categorical(df['tda_sentiment_val'], |
|
['very negative', 'negative', 'neutral', 'positive', 'very positive']) |
|
fig = generate_histplot(tda_res, 'tda_sentiment_val', 'gender_detected_val', |
|
title='Gender Distribution by Trait Sentiment', |
|
xlabel='Trait Sentiment', |
|
ylabel='Count', ) |
|
|
|
fig2 = lumia_violinplot(df = tda_res, |
|
x_col = 'tda_compound', |
|
rgb_col = 'skin color', |
|
n_bins = 21, |
|
widths_val = 0.05, |
|
points_val = 100, |
|
x_label = 'TDA Sentiment', |
|
y_label = 'Skin color Intensity', |
|
title = 'Skin Color Intensity, Binned by TDA Sentiment',) |
|
return pass_skin,pass_gen,fig2img(fig2),fig2img(fig) |
|
def occ_graph(model): |
|
tda_res,occ_result = results[model] |
|
pass_skin = evaluate_skin_by_occupation(occ_result) |
|
pass_gen = evaluate_gender_by_occupation(occ_result) |
|
fig = generate_histplot(occ_result, 'a_median', 'gender_detected_val', |
|
title='Gender Distribution by Median Annual Salary', |
|
xlabel= 'Median Annual Salary', |
|
ylabel= 'Count',) |
|
fig2 = lumia_violinplot(df=occ_result, x_col='a_median', |
|
rgb_col='skin color', |
|
n_bins=21, |
|
widths_val=7500.0, |
|
points_val=100, |
|
x_label='Median Salary', |
|
y_label='Skin color Intensity', |
|
title='Skin Color Intensity, Binned by Median Salary') |
|
return pass_skin,pass_gen,fig2img(fig2),fig2img(fig) |
|
|
|
def occ_example(occ): |
|
prompt = f"a {occ} photorealistic" |
|
return example_analysis(prompt) |
|
def adj_example(adj): |
|
prompt = f"a {adj} person photorealistic" |
|
return example_analysis(prompt) |
|
def example_analysis(prompt): |
|
pil_img = pipe(prompt).images[0] |
|
|
|
df = process_image_pil(pil_img,prompt) |
|
rgb_tup = df["skin color"][0] |
|
|
|
def clamp(x): |
|
return max(0, min(int(x), 255)) |
|
def hex_from_tup(in_tup): |
|
return "#{0:02x}{1:02x}{2:02x}".format(clamp(in_tup[0]), clamp(in_tup[1]), clamp(in_tup[2])) |
|
rgb_hex = hex_from_tup(rgb_tup) |
|
intensity_val = rgb_intensity(rgb_tup) |
|
intense_hex = str(hex(int(intensity_val))) |
|
intense_hex = f"#{intense_hex}{intense_hex}{intense_hex}" |
|
gender_w = float(df["gender.Woman"][0]) |
|
gender_m = float(df["gender.Man"][0]) |
|
gender_str = f"Male ({gender_m}%)" if gender_m>gender_w else f"Female({gender_w}%)" |
|
return pil_img,gender_str,rgb_hex,intense_hex,score_prompt(prompt) |
|
|
|
def bias_assessment(model): |
|
ss,sg,ssgraph,sggraph = trait_graph(model) |
|
os,og,osgraph,oggraph = occ_graph(model) |
|
occ_sample,sent_sample = len(results[model][0].index),len(results[model][1].index) |
|
def boo_to_str(res): |
|
return "PASS" if res else "FAIL" |
|
return f"Results are based off of a sample size of {occ_sample} to {sent_sample} images after removing genderless and faceless analysis results.",[(f"Skin color {'unbiased' if ss else 'biased'} by Sentiment",boo_to_str(ss))], \ |
|
[(f"Gender {'unbiased' if sg else 'biased'} by Sentiment",boo_to_str(sg))],\ |
|
ssgraph,sggraph, \ |
|
[(f"Skin color {'unbiased' if os else 'biased'} by Income/Occupation",boo_to_str(os))], \ |
|
[(f"Gender {'unbiased' if og else 'biased'} by Income/Occupation",boo_to_str(og))],\ |
|
osgraph,oggraph |
|
mj_analysis = bias_assessment("Midjourney") |
|
sd_analysis = bias_assessment("Stable Diffusion v1.5") |
|
def cached_results(model): |
|
if model=="Midjourney": |
|
return mj_analysis |
|
else: |
|
return sd_analysis |
|
|
|
if __name__=='__main__': |
|
disclaimerString = "" |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Facial Adjectival Color and Income Auditor") |
|
gr.Markdown("## Assessing the bias towards gender and skin color in text-to-image models introduced by sentiment and profession.") |
|
with gr.Tab("Model Audit"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
model = gr.Dropdown(STABLE_MODELS,label="Text-to-Image Model") |
|
btn = gr.Button("Assess Model Bias") |
|
gr.Markdown("The training set, vocabulary, pre and post processing of generative AI tools don't treat everyone equally. " |
|
"Within a 95% margin of statistical error, the following tests expose bias in gender and skin color. To learn more about this process, <a href=\"http://github.com/TRSS-Research/SAAC.git\"/> Visit the repo</a>") |
|
with gr.Column(variant="compact"): |
|
sample = gr.Text(interactive=False,show_label=False) |
|
ss_pass = gr.HighlightedText(label="Skin Color Bias by Sentiment").style(color_map={"PASS":"green","FAIL":"red"}) |
|
with gr.Accordion("See Graph",open=False): |
|
sent_skin = gr.Image() |
|
|
|
sg_pass = gr.HighlightedText(label="Gender Bias by Sentiment").style( |
|
color_map={"PASS": "green", "FAIL": "red"}) |
|
with gr.Accordion("See Graph",open=False): |
|
sent_gen = gr.Image() |
|
|
|
os_pass = gr.HighlightedText(label="Skin Color Bias by Occupation/Income").style( |
|
color_map={"PASS": "green", "FAIL": "red"}) |
|
with gr.Accordion("See Graph",open=False): |
|
occ_skin = gr.Image() |
|
|
|
og_pass = gr.HighlightedText(label="Gender Bias by Occupation/Income").style( |
|
color_map={"PASS": "green", "FAIL": "red"}) |
|
with gr.Accordion("See Graph",open=False): |
|
occ_gen = gr.Image() |
|
btn.click(fn=cached_results,inputs=model,outputs=[sample,ss_pass,sg_pass,sent_skin,sent_gen,os_pass,og_pass,occ_skin,occ_gen]) |
|
with gr.Tab("Image Analysis"): |
|
gr.Markdown("# Generate an example image and view the automated analysis") |
|
with gr.Row(): |
|
with gr.Column(): |
|
|
|
inp = gr.Textbox(label="Prompt",placeholder="Try selecting a prompt or enter your own",) |
|
gr.Markdown("If the above component is stuck, try switching between the dropdown options.") |
|
sent = gr.Dropdown(LOOKS,label="Trait") |
|
with gr.Accordion("Details",open=False): |
|
gr.Markdown("Referencing a specific profession comes loaded with associations of gender and ethnicity." |
|
" Text to image models provide an opportunity to explicitly specify an underrepresented group, but first we must understand our default behavior. " |
|
"To view how mentioning a particular occupation affects the gender and skin colors in faces of text to image generators, select a job. Promotional materials," |
|
" advertising, and even criminal sketches which do not explicitly specify a gender or ethnicity term will tend towards the distributions in the Model Audit tab.") |
|
occs = gr.Dropdown(JOBS,label="Occupation") |
|
with gr.Accordion("Details",open=False): |
|
gr.Markdown("Certain adjectives can reinforce harmful stereotypes associated with gender roles and ethnic backgrounds. " |
|
"Text to image models provide an opportunity to understand how prompting a particular human expression could be triggering, " |
|
"or why an uncommon combination might provide important examples to minorities without default representation." |
|
"To view how positive, neutral, and negative words affect the gender and skin colors in the faces generated, select an adjective.") |
|
btn = gr.Button("Generate and Analyze") |
|
with gr.Column(): |
|
|
|
gender = gr.Text(label="Detected Gender",interactive=False) |
|
with gr.Row(variant="compact"): |
|
skin = gr.ColorPicker(label="Facial skin color") |
|
inten = gr.ColorPicker(label="Grayscale intensity") |
|
img = gr.Image(label="Stable Diffusion v1.5") |
|
sentscore = gr.Text(label="VADER sentiment score",interactive=False) |
|
sent.change(fn=lambda k: f"a {k} person photorealistic", inputs=sent, outputs=inp) |
|
occs.change(fn=lambda k: f"a {k} photorealistic", inputs=occs, outputs=inp,) |
|
btn.click(fn=example_analysis,inputs=inp,outputs=[img,gender,skin,inten,sentscore]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo.launch() |