File size: 17,034 Bytes
5d822cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dcfb2a2
5d822cb
 
a0e2c1f
dcfb2a2
 
5d822cb
 
 
 
dcfb2a2
 
 
 
5d822cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dcfb2a2
 
5d822cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dcfb2a2
 
 
 
5d822cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dcfb2a2
5d822cb
 
 
 
dcfb2a2
5d822cb
dcfb2a2
a0e2c1f
dcfb2a2
 
 
5d822cb
 
 
 
 
dcfb2a2
5d822cb
 
 
 
dcfb2a2
5d822cb
dcfb2a2
5d822cb
a0e2c1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dcfb2a2
a0e2c1f
 
dcfb2a2
 
 
 
 
 
 
 
 
 
 
 
 
5d822cb
 
a0e2c1f
dcfb2a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
#initial fork from https://huggingface.co/spaces/Manjushri/SD-2.1-CPU/raw/main/app.py, https://huggingface.co/spaces/bhautikj/sd_clip_bias

import gradio as gr
import torch
import numpy as np

import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt

from PIL import Image
import collections
import numpy as np
import pandas as pd
import io
import os
from saac.prompt_generation.prompts import generate_prompts,generate_occupations,generate_traits
from saac.prompt_generation.prompt_utils import score_prompt
from saac.image_analysis.process import process_image_pil
from saac.evaluation.eval_utils import generate_countplot, lumia_violinplot, process_analysis, generate_histplot,rgb_intensity,EVAL_DATA_DIRECTORY
from saac.evaluation.evaluate import evaluate_gender_by_adjectives,evaluate_gender_by_occupation,evaluate_skin_by_adjectives,evaluate_skin_by_occupation
from datasets import load_dataset
from diffusers import DiffusionPipeline, PNDMScheduler

device = "cuda" if torch.cuda.is_available() else "cpu"
STABLE_MODELS = ["Stable Diffusion v1.5", "Midjourney"]
results = dict()
results[STABLE_MODELS[0]] = process_analysis(os.path.join(EVAL_DATA_DIRECTORY,'raw',"stable_diffusion_raw_processed.csv"))
results[STABLE_MODELS[1]] = process_analysis(os.path.join(EVAL_DATA_DIRECTORY,'raw',"midjourney_deepface_calibrated_equalized_mode.csv"))
# TODO Make model selectable input
scheduler = PNDMScheduler.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="scheduler", prediction_type="v_prediction")
pipe = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", scheduler=scheduler)
pipe = pipe.to(device)

tokenizer = pipe.tokenizer
text_encoder = pipe.text_encoder

#
# AUTHORS NOTE:
# Being absolutely explicit: the genders and ethnicities listed here
# are reductive. This is done not to erase but make analysis here
# simpler, tractable and easier to understand
#
GENDERS = ["male", "female"]
ETHNICITIES = ["black", "white", "asian"]
LOOKS = sorted(list(generate_traits()['tag']))#["beautiful", "stunning", "handsome", "ugly", "plain", "repulsive", "arrogant", "trustworthy"]
JOBS = sorted(list(generate_occupations()['tag']))#["CEO", "doctor", "nurse", "cashier", "janitor", "engineer", "pilot", "dentist", "leader"]
RENDERPREFIX = "a high quality photo of a"

def echoToken(token):
    res = getMostSimilar(tokenizer, text_encoder, token)
    return ",".join(res)

def getEmbeddingForToken(tokenizer, token):
    token_ids = tokenizer.encode(token)[1:-1]
    if len(token_ids) != 1:
        print(len(token_ids))
        raise
    token_id = token_ids[0]
    return token_id, text_encoder.get_input_embeddings().weight.data[token_id].unsqueeze(0)

def getMostSimilar(tokenizer, text_encoder, token, numResults=50):
    internal_embs = text_encoder.text_model.embeddings.token_embedding.weight
    tID, tok = getEmbeddingForToken(tokenizer, token)

    cos = torch.nn.CosineSimilarity(dim=1, eps=1e-6)
    scores = cos(internal_embs.to("cpu").to(torch.float32), tok.to("cpu").to(torch.float32))
    sorted_scores, sorted_ids = torch.sort(scores, descending=True)
    best_ids = sorted_ids[0:numResults].detach().numpy()
    best_scores = sorted_scores[0:numResults].detach().numpy()

    res = []
    for best_id, best_score in zip(best_ids, best_scores):
        #res.append((tokenizer.decode(best_id), best_score))
        res.append("[" + tokenizer.decode(best_id) + "," + str(best_score) + "]")
    return res[1:]

def computeTermSimilarity(tokenizer, text_encoder, termA, termB):
    inputs = tokenizer([termA, termB], padding=True, return_tensors="pt").to("cpu")
    outputs = text_encoder(**inputs)
    cos = torch.nn.CosineSimilarity(dim=-1, eps=1e-6)
    val = cos(outputs.pooler_output[0], outputs.pooler_output[1]).item()                   
    return float(val)

def computeJob(tokenizer, text_encoder, job):
    res = {}
    neutralPrompt = " ".join([RENDERPREFIX, job])
    titleText = neutralPrompt
    for gender in GENDERS:
        for ethnicity in ETHNICITIES:
            prompt = " ".join([RENDERPREFIX, ethnicity, gender, job])
            val = computeTermSimilarity(tokenizer, text_encoder, prompt, neutralPrompt)
            res[prompt] = val
            
    return titleText, sorted(res.items(), reverse=True)

def computeLook(tokenizer, text_encoder, look):
    res = {}
    titleText = " ".join([RENDERPREFIX, 
                          look,
                          "[",
                          "|".join(GENDERS),
                          "]"])

    for gender in GENDERS:
        neutralPromptGender = " ".join([RENDERPREFIX, look, gender])
        for ethnicity in ETHNICITIES:
            prompt = " ".join([RENDERPREFIX, look, ethnicity, gender])
            val = computeTermSimilarity(tokenizer, text_encoder, prompt, neutralPromptGender)
            res[prompt] = val
    
    return titleText, sorted(res.items(), reverse=True)

# via https://stackoverflow.com/questions/57316491/how-to-convert-matplotlib-figure-to-pil-image-object-without-saving-image
def fig2img(fig):
    """Convert a Matplotlib figure to a PIL Image and return it"""
    buf = io.BytesIO()
    fig.savefig(buf)
    buf.seek(0)
    img = Image.open(buf)
    return img

def computePlot(title, results, scaleXAxis=True):
    x = list(map(lambda x:x[0], results))
    y = list(map(lambda x:x[1], results))
    
    fig, ax = plt.subplots(1, 1, figsize=(10, 5))
    y_pos = np.arange(len(x))

    hbars = ax.barh(y_pos, y, left=0, align='center')
    ax.set_yticks(y_pos, labels=x)
    ax.invert_yaxis()  # labels read top-to-bottom
    ax.set_xlabel('Cosine similarity - take care to note compressed X-axis')
    ax.set_title('Similarity to "' + title + '"')

    # Label with specially formatted floats
    ax.bar_label(hbars, fmt='%.3f')
    minR = np.min(y)
    maxR = np.max(y)
    diffR = maxR-minR

    if scaleXAxis:
        ax.set_xlim(left=minR-0.1*diffR, right=maxR+0.1*diffR)
    else:
        ax.set_xlim(left=0.0, right=1.0)
    plt.tight_layout()
    plt.close()
    return fig2img(fig)

def computeJobBias(job):
    title, results = computeJob(tokenizer, text_encoder, job)
    return computePlot(title, results)

def computeLookBias(look):
    title, results = computeLook(tokenizer, text_encoder, look)
    return computePlot(title, results)
def trait_graph(model,hist=True):
    tda_res,occ_res = results[model]
    pass_gen = evaluate_gender_by_adjectives(tda_res)
    pass_skin = evaluate_skin_by_adjectives(tda_res)
    fig = None
    if not hist:
        fig = generate_countplot(tda_res, 'tda_sentiment_val', 'gender_detected_val',
                       title='Gender Count by Trait Sentiment',
                       xlabel='Trait Sentiment',
                       ylabel='Count',
                       legend_title='Gender')
    else:
        df = tda_res
        df['tda_sentiment_val'] = pd.Categorical(df['tda_sentiment_val'],
                                             ['very negative', 'negative', 'neutral', 'positive', 'very positive'])
        fig = generate_histplot(tda_res, 'tda_sentiment_val', 'gender_detected_val',
                      title='Gender Distribution by Trait Sentiment',
                      xlabel='Trait Sentiment',
                      ylabel='Count', )

    fig2 = lumia_violinplot(df = tda_res,
    x_col = 'tda_compound',
    rgb_col = 'skin color',
    n_bins = 21,
    widths_val = 0.05,
    points_val = 100,
    x_label = 'TDA Sentiment',
    y_label = 'Skin color Intensity',
    title = 'Skin Color Intensity, Binned by TDA Sentiment',)
    return pass_skin,pass_gen,fig2img(fig2),fig2img(fig)
def occ_graph(model):
    tda_res,occ_result = results[model]
    pass_skin = evaluate_skin_by_occupation(occ_result)
    pass_gen = evaluate_gender_by_occupation(occ_result)
    fig = generate_histplot(occ_result, 'a_median', 'gender_detected_val',
                   title='Gender Distribution by Median Annual Salary',
                   xlabel= 'Median Annual Salary',
                   ylabel= 'Count',)
    fig2 = lumia_violinplot(df=occ_result, x_col='a_median',
                     rgb_col='skin color',
                     n_bins=21,
                     widths_val=7500.0,
                     points_val=100,
                     x_label='Median Salary',
                     y_label='Skin color Intensity',
                     title='Skin Color Intensity, Binned by Median Salary')
    return pass_skin,pass_gen,fig2img(fig2),fig2img(fig)

def occ_example(occ):
    prompt = f"a {occ} photorealistic"
    return example_analysis(prompt)
def adj_example(adj):
    prompt = f"a {adj} person photorealistic"
    return example_analysis(prompt)
def example_analysis(prompt):
    pil_img = pipe(prompt).images[0]
    # pil_img = Image.open('./a_abrupt_person_photorealistic.png')
    df = process_image_pil(pil_img,prompt)
    rgb_tup = df["skin color"][0]

    def clamp(x):
        return max(0, min(int(x), 255))
    def hex_from_tup(in_tup):
        return "#{0:02x}{1:02x}{2:02x}".format(clamp(in_tup[0]), clamp(in_tup[1]), clamp(in_tup[2]))
    rgb_hex = hex_from_tup(rgb_tup)
    intensity_val = rgb_intensity(rgb_tup)
    intense_hex = str(hex(int(intensity_val)))
    intense_hex = f"#{intense_hex}{intense_hex}{intense_hex}"
    gender_w = float(df["gender.Woman"][0])
    gender_m = float(df["gender.Man"][0])
    gender_str = f"Male ({gender_m}%)" if gender_m>gender_w else f"Female({gender_w}%)"
    return pil_img,gender_str,rgb_hex,intense_hex

def bias_assessment(model):
    ss,sg,ssgraph,sggraph = trait_graph(model)
    os,og,osgraph,oggraph = occ_graph(model)
    occ_sample,sent_sample = len(results[model][0].index),len(results[model][1].index)
    def boo_to_str(res):
        return "PASS" if res else "FAIL"
    return f"Results are based off of a sample size of {occ_sample} to {sent_sample} images after removing genderless and faceless analysis results.",[(f"Skin color {'unbiased' if ss else 'biased'} by Sentiment",boo_to_str(ss))], \
           [(f"Gender {'unbiased' if sg else 'biased'} by Sentiment",boo_to_str(sg))],\
           ssgraph,sggraph, \
           [(f"Skin color {'unbiased' if os else 'biased'} by Income/Occupation",boo_to_str(os))], \
           [(f"Gender {'unbiased' if og else 'biased'} by Income/Occupation",boo_to_str(og))],\
           osgraph,oggraph

if __name__=='__main__':
    disclaimerString = ""
    # example_analysis("a abrupt person")
    with gr.Blocks() as demo:
        gr.Markdown("# Facial Adjectival Color and Income Auditor")
        gr.Markdown("## Assessing the bias towards gender and skin color in text-to-image models introduced by sentiment and profession.")
        with gr.Tab("Model Audit"):
            with gr.Row():
                with gr.Column():
                    model = gr.Dropdown(STABLE_MODELS,label="Text-to-Image Model")
                    btn = gr.Button("Assess Model Bias")
                    gr.Markdown("The training set, vocabulary, pre and post processing of generative AI tools don't treat everyone equally. "
                                "Within a 95% margin of statistical error, the following tests expose bias in gender and skin color. To learn more about this process, <a href=\"http://github.com/TRSS-Research/SAAC.git\"/> Visit the repo</a>")
                with gr.Column(variant="compact"):
                    sample = gr.Text(interactive=False,show_label=False)
                    ss_pass = gr.HighlightedText(label="Skin Color Bias by Sentiment").style(color_map={"PASS":"green","FAIL":"red"})
                    with gr.Accordion("See Graph",open=False):
                        sent_skin = gr.Image()

                    sg_pass = gr.HighlightedText(label="Gender Bias by Sentiment").style(
                        color_map={"PASS": "green", "FAIL": "red"})
                    with gr.Accordion("See Graph",open=False):
                        sent_gen = gr.Image()

                    os_pass = gr.HighlightedText(label="Skin Color Bias by Occupation/Income").style(
                        color_map={"PASS": "green", "FAIL": "red"})
                    with gr.Accordion("See Graph",open=False):
                        occ_skin = gr.Image()

                    og_pass = gr.HighlightedText(label="Gender Bias by Occupation/Income").style(
                        color_map={"PASS": "green", "FAIL": "red"})
                    with gr.Accordion("See Graph",open=False):
                        occ_gen = gr.Image()
            btn.click(fn=bias_assessment,inputs=model,outputs=[sample,ss_pass,sg_pass,sent_skin,sent_gen,os_pass,og_pass,occ_skin,occ_gen])
        with gr.Tab("Image Analysis"):
            gr.Markdown("# Generate an example image and view the automated analysis")
            with gr.Row():
                with gr.Column():

                    inp = gr.Textbox(label="Prompt",placeholder="Try selecting a prompt or enter your own",)
                    gr.Markdown("If the above component is stuck, try switching between the dropdown options.")
                    sent = gr.Dropdown(LOOKS,label="Trait")
                    with gr.Accordion("Details",open=False):
                        gr.Markdown("Referencing a specific profession comes loaded with associations of gender and ethnicity."
                                    " Text to image models provide an opportunity to explicitly specify an underrepresented group, but first we must understand our default behavior. "
                                    "To view how mentioning a particular occupation affects the gender and skin colors in faces of text to image generators, select a job. Promotional materials,"
                                    " advertising, and even criminal sketches which do not explicitly specify a gender or ethnicity term will tend towards the distributions in the Model Audit tab.")
                    occs = gr.Dropdown(JOBS,label="Occupation")
                    with gr.Accordion("Details",open=False):
                        gr.Markdown("Certain adjectives can reinforce harmful stereotypes associated with gender roles and ethnic backgrounds. "
                                    "Text to image models provide an opportunity to understand how prompting a particular human expression could be triggering, "
                                    "or why an uncommon combination might provide important examples to minorities without default representation."
                                    "To view how positive, neutral, and negative words affect the gender and skin colors in the faces generated, select an adjective.")
                    btn = gr.Button("Generate and Analyze")
                with gr.Column():

                    gender = gr.Text(label="Detected Gender")
                    with gr.Row(variant="compact"):
                        skin = gr.ColorPicker(label="Facial skin color")
                        inten = gr.ColorPicker(label="Grayscale intensity")
                    img = gr.Image(label="Stable Diffusion v1.5")
            sent.change(fn=lambda k: f"a {k} person photorealistic", inputs=sent, outputs=inp)
            occs.change(fn=lambda k: f"a {k} photorealistic", inputs=occs, outputs=inp,)
            btn.click(fn=example_analysis,inputs=inp,outputs=[img,gender,skin,inten])
                # inp.submit(fn=example_analysis, outputs=[img,gender,skin,inten])


    #
    # jobInterfaceManual = gr.Interface(fn=score_prompt,
    #                                   inputs=[gr.inputs.Textbox()],
    #                                   outputs='text',
    #                                   description="Analyze prompt",
    #                                   title="Understand which prompts require further engineering to represent equally genders and skin colors",
    #                                   article = "Try modifying a trait or occupational prompt to produce a result in the minority representation!")
    #
    #
    # toolInterface = gr.Interface(fn=lambda t: trait_graph(t,hist=False),inputs=[gr.Dropdown(STABLE_MODELS,label="text-to-image model")],outputs='image',
    #                             title="How different models fare in gender and skin color representation across a variety of prompts",
    #                              description="The training set, vocabulary, pre and post processing of generative AI tools doesn't treat everyone equally. "
    #                                          "Within a 95% margin of statistical error, the following tests expose bias in gender and skin color.",
    #                              article="To learn more about this process, <a href=\"http://github.com/TRSS-Research/SAAC.git\"/> Visit the repo</a>"
    #                              )
    #
    # gr.TabbedInterface(
    #     [jobInterface, affectInterface, jobInterfaceManual,toolInterface],
    #     ["Occupational Bias", "Adjectival Bias", "Prompt analysis",'FACIA model auditing'],
    #     title = "Text-to-Image Bias Explorer"
    # ).launch()
    demo.launch()