Anonymous Authors commited on
Commit
e79434d
1 Parent(s): 7cf5700

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +289 -0
app.py ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import gradio as gr
3
+ import os
4
+ from PIL import Image
5
+ import plotly.graph_objects as go
6
+ import plotly.express as px
7
+ import operator
8
+
9
+ TITLE = "Identity Representation in Diffusion Models"
10
+
11
+ _INTRO = """
12
+ # Identity Representation in Diffusion Models
13
+
14
+ Explore the data generated from [DiffusionBiasExplorer](https://huggingface.co/spaces/tti-bias/DiffusionBiasExplorer)!
15
+ This demo showcases patterns in images generated by Stable Diffusion and Dalle-2 systems.
16
+ Specifically, images obtained from prompt inputs that span various gender- and ethnicity-related terms are clustered to show how those shape visual representations (more details below).
17
+ We encourage users to take advantage of this app to explore those trends, for example through the lens of the following questions:
18
+ - Find the cluster that has the most prompts denoting a gender or ethnicity that you identify with. Do you think the generated images look like you?
19
+ - Find two clusters that have a similar distribution of gender terms but different distributions of ethnicity terms. Do you see any meaningful differences in how gender is visually represented?
20
+ - Do you find that some ethnicity terms lead to more stereotypical visual representations than others?
21
+ - Do you find that some gender terms lead to more stereotypical visual representations than others?
22
+
23
+ These questions only scratch the surface of what we can learn from demos like this one,
24
+ let us know what you find [in the discussions tab](https://huggingface.co/spaces/tti-bias/DiffusionFaceClustering/discussions),
25
+ or if you think of other relevant questions!
26
+ """
27
+
28
+ _CONTEXT = """
29
+ ##### How do diffusion-based models represent gender and ethnicity?
30
+
31
+ In order to evaluate the *social biases* that Text-to-Image (TTI) systems may reproduce or exacerbate,
32
+ we need to first understand how the visual representations they generate relate to notions of gender and ethnicity.
33
+ These two aspects of a person's identity, however, ar known as **socialy constructed characteristics**:
34
+ that is to say, gender and ethnicity only exist in interactions between people, they do not have an independent existence based solely on physical (or visual) attributes.
35
+ This means that while we can characterize trends in how the models associate visual features with specific *identity terms in the generation prompts*,
36
+ we should not assign a specific gender or ethnicity to a synthetic figure generated by an ML model.
37
+
38
+ In this app, we instead take a 2-step clustering-based approach. First, we generate 680 images for each model by varying mentions of terms that denote gender or ethnicity in the prompts.
39
+ Then, we use a [VQA-based model](https://huggingface.co/Salesforce/blip-vqa-base) to cluster these images at different granularities (12, 24, or 48 clusters).
40
+ Exploring these clusters allows us to examine trends in the models' associations between visual features and textual representation of social attributes.
41
+
42
+ **Note:** this demo was developed with a limited set of gender- and ethnicity-related terms that are more relevant to the US context as a first approach,
43
+ so users may not always find themselves represented.
44
+ """
45
+
46
+ clusters_12 = json.load(open("clusters/id_all_blip_clusters_12.json"))
47
+ clusters_24 = json.load(open("clusters/id_all_blip_clusters_24.json"))
48
+ clusters_48 = json.load(open("clusters/id_all_blip_clusters_48.json"))
49
+
50
+ clusters_by_size = {
51
+ 12: clusters_12,
52
+ 24: clusters_24,
53
+ 48: clusters_48,
54
+ }
55
+
56
+
57
+ def to_string(label):
58
+ if label == "SD_2":
59
+ label = "Stable Diffusion 2.0"
60
+ elif label == "SD_14":
61
+ label = "Stable Diffusion 1.4"
62
+ elif label == "DallE":
63
+ label = "Dall-E 2"
64
+ elif label == "non-binary":
65
+ label = "non-binary person"
66
+ elif label == "person":
67
+ label = "<i>unmarked</i> (person)"
68
+ elif label == "":
69
+ label = "<i>unmarked</i> ()"
70
+ elif label == "gender":
71
+ label = "gender term"
72
+ return label
73
+
74
+
75
+ def summarize_clusters(clusters_list, max_terms=3):
76
+ for cl_id, cl_dict in enumerate(clusters_list):
77
+ total = len(cl_dict["img_path_list"])
78
+ gdr_list = cl_dict["labels_gender"]
79
+ eth_list = cl_dict["labels_ethnicity"]
80
+ cl_dict["sentence_desc"] = (
81
+ f"Cluster {cl_id} | \t"
82
+ + f"gender terms incl.: {gdr_list[0][0].replace('person', 'unmarked(gender)')}"
83
+ + (
84
+ f" - {gdr_list[1][0].replace('person', 'unmarked(gender)')} | "
85
+ if len(gdr_list) > 1
86
+ else " | "
87
+ )
88
+ + f"ethnicity terms incl.: {'unmarked(ethnicity)' if eth_list[0][0] == '' else eth_list[0][0]}"
89
+ + (
90
+ f" - {'unmarked(ethnicity)' if eth_list[1][0] == '' else eth_list[1][0]}"
91
+ if len(eth_list) > 1
92
+ else ""
93
+ )
94
+ )
95
+ cl_dict["summary_desc"] = (
96
+ f"Cluster {cl_id} has {total} images.\n"
97
+ + f"- The most represented gender terms are {gdr_list[0][0].replace('person', 'unmarked')} ({gdr_list[0][1]})"
98
+ + (
99
+ f" and {gdr_list[1][0].replace('person', 'unmarked')} ({gdr_list[1][1]}).\n"
100
+ if len(gdr_list) > 1
101
+ else ".\n"
102
+ )
103
+ + f"- The most represented ethnicity terms are {'unmarked' if eth_list[0][0] == '' else eth_list[0][0]} ({eth_list[0][1]})"
104
+ + (
105
+ f" and {'unmarked' if eth_list[1][0] == '' else eth_list[1][0]} ({eth_list[1][1]}).\n"
106
+ if len(eth_list) > 1
107
+ else ".\n"
108
+ )
109
+ + "See below for a more detailed description."
110
+ )
111
+
112
+
113
+ for _, clusters_list in clusters_by_size.items():
114
+ summarize_clusters(clusters_list)
115
+
116
+ dropdown_descs = dict(
117
+ (num_clusters, [cl_dct["sentence_desc"] for cl_dct in clusters_list])
118
+ for num_clusters, clusters_list in clusters_by_size.items()
119
+ )
120
+
121
+
122
+ def describe_cluster(cl_dict, block="label", max_items=4):
123
+ labels_values = sorted(cl_dict.items(), key=operator.itemgetter(1))
124
+ labels_values.reverse()
125
+ total = float(sum(cl_dict.values()))
126
+ lv_prcnt = list(
127
+ (item[0], round(item[1] * 100 / total, 0)) for item in labels_values
128
+ )
129
+ top_label = lv_prcnt[0][0]
130
+ description_string = (
131
+ "<span>The most represented %s is <b>%s</b>, making up about <b>%d%%</b> of the cluster.</span>"
132
+ % (to_string(block), to_string(top_label), lv_prcnt[0][1])
133
+ )
134
+ description_string += "<p>This is followed by: "
135
+ for lv in lv_prcnt[1 : min(len(lv_prcnt), 1 + max_items)]:
136
+ description_string += "<BR/><b>%s:</b> %d%%" % (to_string(lv[0]), lv[1])
137
+ if len(lv_prcnt) > max_items + 1:
138
+ description_string += "<BR/><b> - Other terms:</b> %d%%" % (
139
+ sum(lv[1] for lv in lv_prcnt[max_items + 1 :]),
140
+ )
141
+ description_string += "</p>"
142
+ return description_string
143
+
144
+
145
+ def show_cluster(cl_id, num_clusters):
146
+ if not cl_id:
147
+ cl_id = 0
148
+ else:
149
+ cl_id = (
150
+ dropdown_descs[num_clusters].index(cl_id)
151
+ if cl_id in dropdown_descs[num_clusters]
152
+ else 0
153
+ )
154
+ if not num_clusters:
155
+ num_clusters = 12
156
+ cl_dct = clusters_by_size[num_clusters][cl_id]
157
+ images = []
158
+ for i in range(8):
159
+ img_path = "/".join(
160
+ [st.replace("/", "") for st in cl_dct["img_path_list"][i].split("//")][3:]
161
+ )
162
+ im = Image.open(img_path)
163
+ # .resize((256, 256))
164
+ caption = (
165
+ "_".join([img_path.split("/")[0], img_path.split("/")[-1]])
166
+ .replace("Photo_portrait_of_an_", "")
167
+ .replace("Photo_portrait_of_a_", "")
168
+ .replace("SD_v2_random_seeds_identity_", "(SD v.2) ")
169
+ .replace("dataset-identities-dalle2_", "(Dall-E 2) ")
170
+ .replace("SD_v1.4_random_seeds_identity_", "(SD v.1.4) ")
171
+ .replace("_", " ")
172
+ )
173
+ images.append((im, caption))
174
+ model_fig = go.Figure()
175
+ model_fig.add_trace(
176
+ go.Pie(
177
+ labels=list(dict(cl_dct["labels_model"]).keys()),
178
+ values=list(dict(cl_dct["labels_model"]).values()),
179
+ )
180
+ )
181
+ model_description = describe_cluster(dict(cl_dct["labels_model"]), "system")
182
+
183
+ gender_fig = go.Figure()
184
+ gender_fig.add_trace(
185
+ go.Pie(
186
+ labels=list(dict(cl_dct["labels_gender"]).keys()),
187
+ values=list(dict(cl_dct["labels_gender"]).values()),
188
+ )
189
+ )
190
+ gender_description = describe_cluster(dict(cl_dct["labels_gender"]), "gender")
191
+
192
+ ethnicity_fig = go.Figure()
193
+ ethnicity_fig.add_trace(
194
+ go.Bar(
195
+ x=list(dict(cl_dct["labels_ethnicity"]).keys()),
196
+ y=list(dict(cl_dct["labels_ethnicity"]).values()),
197
+ marker_color=px.colors.qualitative.G10,
198
+ )
199
+ )
200
+ ethnicity_description = describe_cluster(
201
+ dict(cl_dct["labels_ethnicity"]), "ethnicity"
202
+ )
203
+
204
+ return (
205
+ clusters_by_size[num_clusters][cl_id]["summary_desc"],
206
+ gender_fig,
207
+ gender_description,
208
+ model_fig,
209
+ model_description,
210
+ ethnicity_fig,
211
+ ethnicity_description,
212
+ images,
213
+ gr.update(choices=dropdown_descs[num_clusters]),
214
+ # gr.update(choices=[i for i in range(num_clusters)]),
215
+ )
216
+
217
+
218
+ with gr.Blocks(title=TITLE) as demo:
219
+ gr.Markdown(_INTRO)
220
+ with gr.Accordion(
221
+ "How do diffusion-based models represent gender and ethnicity?", open =False
222
+ ):
223
+ gr.Markdown(_CONTEXT)
224
+ gr.HTML(
225
+ """<span style="color:red" font-size:smaller>⚠️ DISCLAIMER: the images displayed by this tool were generated by text-to-image systems and may depict offensive stereotypes or contain explicit content.</span>"""
226
+ )
227
+ num_clusters = gr.Radio(
228
+ [12, 24, 48],
229
+ value=12,
230
+ label="How many clusters do you want to make from the data?",
231
+ )
232
+
233
+ with gr.Row():
234
+ with gr.Column():
235
+ cluster_id = gr.Dropdown(
236
+ choices=dropdown_descs[num_clusters.value],
237
+ value=0,
238
+ label="Select cluster to visualize:",
239
+ )
240
+ a = gr.Text(label="Cluster summary")
241
+ with gr.Column():
242
+ gallery = gr.Gallery(label="Most representative images in cluster").style(
243
+ grid=[2, 4], height="auto"
244
+ )
245
+ with gr.Row():
246
+ with gr.Column():
247
+ c = gr.Plot(label="How many images from each system?")
248
+ c_desc = gr.HTML(label="")
249
+ with gr.Column(scale=1):
250
+ b = gr.Plot(label="Which gender terms are represented?")
251
+ b_desc = gr.HTML(label="")
252
+ with gr.Column(scale=2):
253
+ d = gr.Plot(label="Which ethnicity terms are present?")
254
+ d_desc = gr.HTML(label="")
255
+
256
+ gr.Markdown(
257
+ "### Plot Descriptions \n\n"
258
+ + " The **System makeup** plot (*left*) corresponds to the number of images from the cluster that come from each of the TTI systems that we are comparing: Dall-E 2, Stable Diffusion v.1.4. and Stable Diffusion v.2.\n\n"
259
+ + " The **Gender term makeup** plot (*middle*) shows the number of images based on the input prompts that used the phrases man, woman, non-binary person, and person (unmarked) to describe the figure's gender.\n\n"
260
+ + " The **Ethnicity label makeup** plot (*right*) corresponds to the number of images from each of the 18 ethnicity descriptions used in the prompts. A blank value denotes unmarked ethnicity.\n\n"
261
+ )
262
+ demo.load(
263
+ fn=show_cluster,
264
+ inputs=[cluster_id, num_clusters],
265
+ outputs=[a, b, b_desc, c, c_desc, d, d_desc, gallery, cluster_id],
266
+ )
267
+ num_clusters.change(
268
+ fn=show_cluster,
269
+ inputs=[cluster_id, num_clusters],
270
+ outputs=[
271
+ a,
272
+ b,
273
+ b_desc,
274
+ c,
275
+ c_desc,
276
+ d,
277
+ d_desc,
278
+ gallery,
279
+ cluster_id,
280
+ ],
281
+ )
282
+ cluster_id.change(
283
+ fn=show_cluster,
284
+ inputs=[cluster_id, num_clusters],
285
+ outputs=[a, b, b_desc, c, c_desc, d, d_desc, gallery, cluster_id],
286
+ )
287
+
288
+ if __name__ == "__main__":
289
+ demo.queue().launch(debug=True)