File size: 17,492 Bytes
5d822cb dcfb2a2 5d822cb a0e2c1f dcfb2a2 5d822cb dcfb2a2 5d822cb dcfb2a2 5d822cb dcfb2a2 5d822cb dcfb2a2 5d822cb dcfb2a2 5d822cb dcfb2a2 a0e2c1f dcfb2a2 5d822cb dcfb2a2 5d822cb dcfb2a2 5d822cb dcfb2a2 5d822cb a0e2c1f 50eb0da a0e2c1f 50eb0da a0e2c1f 50eb0da dcfb2a2 b5edb8f a0e2c1f dcfb2a2 b5edb8f e6b1f30 b5edb8f dcfb2a2 5d822cb 50eb0da dcfb2a2 b5edb8f dcfb2a2 b5edb8f dcfb2a2 b5edb8f dcfb2a2 b5edb8f dcfb2a2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 |
#initial fork from https://huggingface.co/spaces/Manjushri/SD-2.1-CPU/raw/main/app.py, https://huggingface.co/spaces/bhautikj/sd_clip_bias
import gradio as gr
import torch
import numpy as np
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from PIL import Image
import collections
import numpy as np
import pandas as pd
import io
import os
from saac.prompt_generation.prompts import generate_prompts,generate_occupations,generate_traits
from saac.prompt_generation.prompt_utils import score_prompt
from saac.image_analysis.process import process_image_pil
from saac.evaluation.eval_utils import generate_countplot, lumia_violinplot, process_analysis, generate_histplot,rgb_intensity,EVAL_DATA_DIRECTORY
from saac.evaluation.evaluate import evaluate_gender_by_adjectives,evaluate_gender_by_occupation,evaluate_skin_by_adjectives,evaluate_skin_by_occupation
from datasets import load_dataset
from diffusers import DiffusionPipeline, PNDMScheduler
device = "cuda" if torch.cuda.is_available() else "cpu"
STABLE_MODELS = ["Stable Diffusion v1.5", "Midjourney"]
results = dict()
results[STABLE_MODELS[0]] = process_analysis(os.path.join(EVAL_DATA_DIRECTORY,'raw',"stable_diffusion_raw_processed.csv"))
results[STABLE_MODELS[1]] = process_analysis(os.path.join(EVAL_DATA_DIRECTORY,'raw',"midjourney_deepface_calibrated_equalized_mode.csv"))
# TODO Make model selectable input
scheduler = PNDMScheduler.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="scheduler", prediction_type="v_prediction")
pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", scheduler=scheduler)
pipe = pipe.to(device)
tokenizer = pipe.tokenizer
text_encoder = pipe.text_encoder
#
# AUTHORS NOTE:
# Being absolutely explicit: the genders and ethnicities listed here
# are reductive. This is done not to erase but make analysis here
# simpler, tractable and easier to understand
#
GENDERS = ["male", "female"]
ETHNICITIES = ["black", "white", "asian"]
LOOKS = sorted(list(generate_traits()['tag']))#["beautiful", "stunning", "handsome", "ugly", "plain", "repulsive", "arrogant", "trustworthy"]
JOBS = sorted(list(generate_occupations()['tag']))#["CEO", "doctor", "nurse", "cashier", "janitor", "engineer", "pilot", "dentist", "leader"]
RENDERPREFIX = "a high quality photo of a"
def echoToken(token):
res = getMostSimilar(tokenizer, text_encoder, token)
return ",".join(res)
def getEmbeddingForToken(tokenizer, token):
token_ids = tokenizer.encode(token)[1:-1]
if len(token_ids) != 1:
print(len(token_ids))
raise
token_id = token_ids[0]
return token_id, text_encoder.get_input_embeddings().weight.data[token_id].unsqueeze(0)
def getMostSimilar(tokenizer, text_encoder, token, numResults=50):
internal_embs = text_encoder.text_model.embeddings.token_embedding.weight
tID, tok = getEmbeddingForToken(tokenizer, token)
cos = torch.nn.CosineSimilarity(dim=1, eps=1e-6)
scores = cos(internal_embs.to("cpu").to(torch.float32), tok.to("cpu").to(torch.float32))
sorted_scores, sorted_ids = torch.sort(scores, descending=True)
best_ids = sorted_ids[0:numResults].detach().numpy()
best_scores = sorted_scores[0:numResults].detach().numpy()
res = []
for best_id, best_score in zip(best_ids, best_scores):
#res.append((tokenizer.decode(best_id), best_score))
res.append("[" + tokenizer.decode(best_id) + "," + str(best_score) + "]")
return res[1:]
def computeTermSimilarity(tokenizer, text_encoder, termA, termB):
inputs = tokenizer([termA, termB], padding=True, return_tensors="pt").to("cpu")
outputs = text_encoder(**inputs)
cos = torch.nn.CosineSimilarity(dim=-1, eps=1e-6)
val = cos(outputs.pooler_output[0], outputs.pooler_output[1]).item()
return float(val)
def computeJob(tokenizer, text_encoder, job):
res = {}
neutralPrompt = " ".join([RENDERPREFIX, job])
titleText = neutralPrompt
for gender in GENDERS:
for ethnicity in ETHNICITIES:
prompt = " ".join([RENDERPREFIX, ethnicity, gender, job])
val = computeTermSimilarity(tokenizer, text_encoder, prompt, neutralPrompt)
res[prompt] = val
return titleText, sorted(res.items(), reverse=True)
def computeLook(tokenizer, text_encoder, look):
res = {}
titleText = " ".join([RENDERPREFIX,
look,
"[",
"|".join(GENDERS),
"]"])
for gender in GENDERS:
neutralPromptGender = " ".join([RENDERPREFIX, look, gender])
for ethnicity in ETHNICITIES:
prompt = " ".join([RENDERPREFIX, look, ethnicity, gender])
val = computeTermSimilarity(tokenizer, text_encoder, prompt, neutralPromptGender)
res[prompt] = val
return titleText, sorted(res.items(), reverse=True)
# via https://stackoverflow.com/questions/57316491/how-to-convert-matplotlib-figure-to-pil-image-object-without-saving-image
def fig2img(fig):
"""Convert a Matplotlib figure to a PIL Image and return it"""
buf = io.BytesIO()
fig.savefig(buf)
buf.seek(0)
img = Image.open(buf)
return img
def computePlot(title, results, scaleXAxis=True):
x = list(map(lambda x:x[0], results))
y = list(map(lambda x:x[1], results))
fig, ax = plt.subplots(1, 1, figsize=(10, 5))
y_pos = np.arange(len(x))
hbars = ax.barh(y_pos, y, left=0, align='center')
ax.set_yticks(y_pos, labels=x)
ax.invert_yaxis() # labels read top-to-bottom
ax.set_xlabel('Cosine similarity - take care to note compressed X-axis')
ax.set_title('Similarity to "' + title + '"')
# Label with specially formatted floats
ax.bar_label(hbars, fmt='%.3f')
minR = np.min(y)
maxR = np.max(y)
diffR = maxR-minR
if scaleXAxis:
ax.set_xlim(left=minR-0.1*diffR, right=maxR+0.1*diffR)
else:
ax.set_xlim(left=0.0, right=1.0)
plt.tight_layout()
plt.close()
return fig2img(fig)
def computeJobBias(job):
title, results = computeJob(tokenizer, text_encoder, job)
return computePlot(title, results)
def computeLookBias(look):
title, results = computeLook(tokenizer, text_encoder, look)
return computePlot(title, results)
def trait_graph(model,hist=True):
tda_res,occ_res = results[model]
pass_gen = evaluate_gender_by_adjectives(tda_res)
pass_skin = evaluate_skin_by_adjectives(tda_res)
fig = None
if not hist:
fig = generate_countplot(tda_res, 'tda_sentiment_val', 'gender_detected_val',
title='Gender Count by Trait Sentiment',
xlabel='Trait Sentiment',
ylabel='Count',
legend_title='Gender')
else:
df = tda_res
df['tda_sentiment_val'] = pd.Categorical(df['tda_sentiment_val'],
['very negative', 'negative', 'neutral', 'positive', 'very positive'])
fig = generate_histplot(tda_res, 'tda_sentiment_val', 'gender_detected_val',
title='Gender Distribution by Trait Sentiment',
xlabel='Trait Sentiment',
ylabel='Count', )
fig2 = lumia_violinplot(df = tda_res,
x_col = 'tda_compound',
rgb_col = 'skin color',
n_bins = 21,
widths_val = 0.05,
points_val = 100,
x_label = 'TDA Sentiment',
y_label = 'Skin color Intensity',
title = 'Skin Color Intensity, Binned by TDA Sentiment',)
return pass_skin,pass_gen,fig2img(fig2),fig2img(fig)
def occ_graph(model):
tda_res,occ_result = results[model]
pass_skin = evaluate_skin_by_occupation(occ_result)
pass_gen = evaluate_gender_by_occupation(occ_result)
fig = generate_histplot(occ_result, 'a_median', 'gender_detected_val',
title='Gender Distribution by Median Annual Salary',
xlabel= 'Median Annual Salary',
ylabel= 'Count',)
fig2 = lumia_violinplot(df=occ_result, x_col='a_median',
rgb_col='skin color',
n_bins=21,
widths_val=7500.0,
points_val=100,
x_label='Median Salary',
y_label='Skin color Intensity',
title='Skin Color Intensity, Binned by Median Salary')
return pass_skin,pass_gen,fig2img(fig2),fig2img(fig)
def occ_example(occ):
prompt = f"a {occ} photorealistic"
return example_analysis(prompt)
def adj_example(adj):
prompt = f"a {adj} person photorealistic"
return example_analysis(prompt)
def example_analysis(prompt):
pil_img = pipe(prompt).images[0]
# pil_img = Image.open('./this-is-fine.0.jpg')
df = process_image_pil(pil_img,prompt)
rgb_tup = (128,128,128)
if "skin color" in df:
rgb_tup = df["skin color"][0]
def clamp(x):
return max(0, min(int(x), 255))
def hex_from_tup(in_tup):
return "#{0:02x}{1:02x}{2:02x}".format(clamp(in_tup[0]), clamp(in_tup[1]), clamp(in_tup[2]))
rgb_hex = hex_from_tup(rgb_tup)
intensity_val = rgb_intensity(rgb_tup)
intense_hex = str(hex(int(intensity_val)))
intense_hex = f"#{intense_hex}{intense_hex}{intense_hex}"
gender_w = float(df["gender.Woman"][0]) if "gender.Woman" in df else -1
gender_m = float(df["gender.Man"][0]) if "gender.Man" in df else -1
gender_str = f"Male ({gender_m}%)" if gender_m>gender_w else f"Female({gender_w}%)"
return pil_img,gender_str,rgb_hex,intense_hex,score_prompt(prompt)
def bias_assessment(model):
ss,sg,ssgraph,sggraph = trait_graph(model)
os,og,osgraph,oggraph = occ_graph(model)
occ_sample,sent_sample = len(results[model][0].index),len(results[model][1].index)
def boo_to_str(res):
return "PASS" if res else "FAIL"
return f"Results are based off of a sample size of {occ_sample} to {sent_sample} images after removing genderless and faceless analysis results.",[(f"Skin color {'unbiased' if ss else 'biased'} by Sentiment",boo_to_str(ss))], \
[(f"Gender {'unbiased' if sg else 'biased'} by Sentiment",boo_to_str(sg))],\
ssgraph,sggraph, \
[(f"Skin color {'unbiased' if os else 'biased'} by Income/Occupation",boo_to_str(os))], \
[(f"Gender {'unbiased' if og else 'biased'} by Income/Occupation",boo_to_str(og))],\
osgraph,oggraph
mj_analysis = bias_assessment("Midjourney")
sd_analysis = bias_assessment("Stable Diffusion v1.5")
def cached_results(model):
if model=="Midjourney":
return mj_analysis
else:
return sd_analysis
if __name__=='__main__':
disclaimerString = ""
example_analysis("a abrupt person")
with gr.Blocks() as demo:
gr.Markdown("# Facial Adjectival Color and Income Auditor")
gr.Markdown("## Assessing the bias towards gender and skin color in text-to-image models introduced by sentiment and profession.")
with gr.Tab("Model Audit"):
with gr.Row():
with gr.Column():
model = gr.Dropdown(STABLE_MODELS,label="Text-to-Image Model")
btn = gr.Button("Assess Model Bias")
gr.Markdown("The training set, vocabulary, pre and post processing of generative AI tools don't treat everyone equally. "
"Within a 95% margin of statistical error, the following tests expose bias in gender and skin color. To learn more about this process, <a href=\"http://github.com/TRSS-Research/SAAC.git\"/> Visit the repo</a>")
with gr.Column(variant="compact"):
sample = gr.Text(interactive=False,show_label=False)
ss_pass = gr.HighlightedText(label="Skin Color Bias by Sentiment").style(color_map={"PASS":"green","FAIL":"red"})
with gr.Accordion("See Graph",open=False):
sent_skin = gr.Image()
sg_pass = gr.HighlightedText(label="Gender Bias by Sentiment").style(
color_map={"PASS": "green", "FAIL": "red"})
with gr.Accordion("See Graph",open=False):
sent_gen = gr.Image()
os_pass = gr.HighlightedText(label="Skin Color Bias by Occupation/Income").style(
color_map={"PASS": "green", "FAIL": "red"})
with gr.Accordion("See Graph",open=False):
occ_skin = gr.Image()
og_pass = gr.HighlightedText(label="Gender Bias by Occupation/Income").style(
color_map={"PASS": "green", "FAIL": "red"})
with gr.Accordion("See Graph",open=False):
occ_gen = gr.Image()
btn.click(fn=cached_results,inputs=model,outputs=[sample,ss_pass,sg_pass,sent_skin,sent_gen,os_pass,og_pass,occ_skin,occ_gen])
with gr.Tab("Image Analysis"):
gr.Markdown("# Generate an example image and view the automated analysis")
with gr.Row():
with gr.Column():
inp = gr.Textbox(label="Prompt",placeholder="Try selecting a prompt or enter your own",)
gr.Markdown("If the above component is stuck, try switching between the dropdown options.")
sent = gr.Dropdown(LOOKS,label="Trait")
with gr.Accordion("Details",open=False):
gr.Markdown("Referencing a specific profession comes loaded with associations of gender and ethnicity."
" Text to image models provide an opportunity to explicitly specify an underrepresented group, but first we must understand our default behavior. "
"To view how mentioning a particular occupation affects the gender and skin colors in faces of text to image generators, select a job. Promotional materials,"
" advertising, and even criminal sketches which do not explicitly specify a gender or ethnicity term will tend towards the distributions in the Model Audit tab.")
occs = gr.Dropdown(JOBS,label="Occupation")
with gr.Accordion("Details",open=False):
gr.Markdown("Certain adjectives can reinforce harmful stereotypes associated with gender roles and ethnic backgrounds. "
"Text to image models provide an opportunity to understand how prompting a particular human expression could be triggering, "
"or why an uncommon combination might provide important examples to minorities without default representation."
"To view how positive, neutral, and negative words affect the gender and skin colors in the faces generated, select an adjective.")
btn = gr.Button("Generate and Analyze")
with gr.Column():
gender = gr.Text(label="Detected Gender",interactive=False)
with gr.Row(variant="compact"):
skin = gr.ColorPicker(label="Facial skin color")
inten = gr.ColorPicker(label="Grayscale intensity")
img = gr.Image(label="Stable Diffusion v1.5")
sentscore = gr.Text(label="VADER sentiment score",interactive=False)
sent.change(fn=lambda k: f"a {k} person photorealistic", inputs=sent, outputs=inp)
occs.change(fn=lambda k: f"a {k} photorealistic", inputs=occs, outputs=inp,)
btn.click(fn=example_analysis,inputs=inp,outputs=[img,gender,skin,inten,sentscore])
# inp.submit(fn=example_analysis, outputs=[img,gender,skin,inten])
#
# jobInterfaceManual = gr.Interface(fn=score_prompt,
# inputs=[gr.inputs.Textbox()],
# outputs='text',
# description="Analyze prompt",
# title="Understand which prompts require further engineering to represent equally genders and skin colors",
# article = "Try modifying a trait or occupational prompt to produce a result in the minority representation!")
#
#
# toolInterface = gr.Interface(fn=lambda t: trait_graph(t,hist=False),inputs=[gr.Dropdown(STABLE_MODELS,label="text-to-image model")],outputs='image',
# title="How different models fare in gender and skin color representation across a variety of prompts",
# description="The training set, vocabulary, pre and post processing of generative AI tools doesn't treat everyone equally. "
# "Within a 95% margin of statistical error, the following tests expose bias in gender and skin color.",
# article="To learn more about this process, <a href=\"http://github.com/TRSS-Research/SAAC.git\"/> Visit the repo</a>"
# )
#
# gr.TabbedInterface(
# [jobInterface, affectInterface, jobInterfaceManual,toolInterface],
# ["Occupational Bias", "Adjectival Bias", "Prompt analysis",'FACIA model auditing'],
# title = "Text-to-Image Bias Explorer"
# ).launch()
demo.launch() |