yjernite commited on
Commit
ac7fd1c
·
1 Parent(s): d11d534
Files changed (1) hide show
  1. app.py +166 -72
app.py CHANGED
@@ -34,6 +34,20 @@ _ID_CLUSTER_SCREEN_SHOTS = {
34
  15: ("cluster_15_of_24_woman_white.JPG", "Cluster 15 of 24"),
35
  }
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  def get_images(path):
39
  images = [Image.open(os.path.join(path, im)) for im in os.listdir(path)]
@@ -41,26 +55,45 @@ def get_images(path):
41
  return [(im, path) for im, path in zip(images, paths)]
42
 
43
 
 
 
44
  def show_id_images(cl_id_1, cl_id_2, cl_id_3):
45
  img_path_1, cluster_name_1 = _ID_CLUSTER_SCREEN_SHOTS[cl_id_1]
46
  img_path_2, cluster_name_2 = _ID_CLUSTER_SCREEN_SHOTS[cl_id_2]
47
  img_path_3, cluster_name_3 = _ID_CLUSTER_SCREEN_SHOTS[cl_id_3]
48
  return (
49
  gr.update(
50
- value=os.path.join(impath, img_path_1),
51
  label=f"Screenshot of the Identity Exploration tool for: {cluster_name_1}",
52
  ),
53
  gr.update(
54
- value=os.path.join(impath, img_path_2),
55
  label=f"Screenshot of the Identity Exploration tool for: {cluster_name_2}",
56
  ),
57
  gr.update(
58
- value=os.path.join(impath, img_path_3),
59
  label=f"Screenshot of the Identity Exploration tool for: {cluster_name_3}",
60
  ),
61
  )
62
 
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  with gr.Blocks() as demo:
65
  gr.Markdown(
66
  """
@@ -140,7 +173,6 @@ with gr.Blocks() as demo:
140
  to showcase the visual trends encoded in these clusters - as well as their relation to the social variables under consideration.
141
  """
142
  )
143
- impath = "images/identities"
144
  with gr.Row():
145
  with gr.Column(scale=1):
146
  gr.Markdown(
@@ -172,7 +204,7 @@ with gr.Blocks() as demo:
172
  show_label=False,
173
  )
174
  identity_screenshot_1 = gr.Image(
175
- value=os.path.join(impath, "cluster_2_of_24_latinx_woman.JPG"),
176
  label="Screenshot of the Identity Exploration tool for: Cluster 2 of 24",
177
  )
178
  with gr.Row():
@@ -184,7 +216,7 @@ with gr.Blocks() as demo:
184
  )
185
  identity_screenshot_2 = gr.Image(
186
  value=os.path.join(
187
- impath, "cluster_3_of_24_native_american_stereetotypical.JPG"
188
  ),
189
  label="Screenshot of the Identity Exploration tool for: Cluster 3 of 24",
190
  )
@@ -226,7 +258,7 @@ with gr.Blocks() as demo:
226
  The clusters with the most examples of prompts with unspecified gender and unspecified ethnicity terms are **clusters 5 and 19**,
227
  and both are also strongly associated with the words *man*, *White*, and *Causian*.
228
  This association holds across genders (as showcased by **cluster 15**, which has a majority of *woman* and *White* prompts along with unspecified ethnicity)
229
- and across ethnicities (comparing the proportions of unspecified genders in **clusters 0 and 6**: 18 % and 38% for the clusters with more *aoman* and more *man* respectively along with the *African American* phrases).
230
 
231
  This provides the beginning of an answer to our motivating question: since users rarely specify an explicit gender or ethnicity when using
232
  these systems to generate images of people, the high likelihood of defaulting to *Whiteness* and *masculinity* is likely to at least partially explain the observed lack of diversity in the outputs.
@@ -241,10 +273,19 @@ with gr.Blocks() as demo:
241
  )
242
  identity_screenshot_3 = gr.Image(
243
  value=os.path.join(
244
- impath, "cluster_19_of_24_unmarked_white_unmarked_man.JPG"
245
  ),
246
  label="Screenshot of the Identity Exploration tool for: Cluster 19 of 24",
247
  )
 
 
 
 
 
 
 
 
 
248
  for var in [id_cl_id_1, id_cl_id_2, id_cl_id_3]:
249
  var.change(
250
  show_id_images,
@@ -260,37 +301,76 @@ with gr.Blocks() as demo:
260
  """
261
  ### Quantifying Social Biases in Image Generations: Professions
262
 
263
- Machine Learning models encode and amplify biases that are represented in the data that they are trained on -
264
- this can include, for instance, stereotypes around the appearances of members of different professions.
265
- In our study, we prompted the 3 text-to-image models with texts pertaining to 150 different professions
266
- and analyzed the presence of different identity groups in the images generated. We found evidence of many societal stereotypes in the images generated,
267
- such as the fact that people in positions of power (e.g. director, CEO) are often White- and male-appearing,
268
- while the images generated for other professions are more diverse.
269
- Read more about our findings in the accordion below or directly via the [Diffusion Cluster Explorer](https://hf.co/spaces/society-ethics/DiffusionClustering) tool.
 
 
 
 
 
 
270
  """
271
  )
272
  with gr.Accordion("Quantifying Social Biases in Image Generations: Professions", open=False):
273
  gr.Markdown(
274
  """
275
  <br/>
276
- We also explore the correlations between the professions that use used in our prompts and the different identity clusters that we identified.
277
 
278
- Using the [Profession Bias Tool](https://hf.co/spaces/society-ethics/DiffusionClustering)
279
- we can see which clusters are most correlated with each profession and what identities are in these clusters.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  """
281
  )
282
- impath = "images/bias"
283
  with gr.Row():
284
  with gr.Column(scale=1):
285
  gr.Markdown(
286
  """
287
  #### [Diversity and Representation across Models](https://hf.co/spaces/society-ethics/DiffusionClustering "you can cycle through screenshots of the tool in use on the right, or go straight to the interactive demo")
288
 
289
- Using the **[Profession Bias Tool](https://hf.co/spaces/society-ethics/DiffusionClustering)**,
290
- we can see that the top cluster for the CEO and director professions is **Cluster 4**:
 
 
 
291
 
292
- We can see that the most represented gender term is *man* (56% of the cluster) and *White* (29% of the cluster).
293
- This is consistent with common stereotypes regarding people in positions of power, who are predominantly male, according to the US Labor Bureau Statistics.
 
 
 
 
 
 
 
 
 
294
  """
295
  )
296
  with gr.Column(scale=1):
@@ -298,15 +378,17 @@ with gr.Blocks() as demo:
298
  choices=[
299
  "Results table: all models",
300
  "Results table: Stable Diffusion v1.4",
301
- "Results table: Stable Diffusion v2,",
302
  "Results table: Stable Diffusion Dall-E 2",
303
  "Comparison histogram: all professions",
 
 
304
  ],
305
  value="Results table: all models",
306
  show_label=False,
307
  )
308
  bias_screenshot_1 = gr.Image(
309
- value=os.path.join(impath, "cluster_assign_24_all.png"),
310
  label="Screenshot of the Profession Bias Tool | Results table: all models",
311
  )
312
  with gr.Row():
@@ -322,28 +404,28 @@ with gr.Blocks() as demo:
322
  show_label=False,
323
  )
324
  bias_screenshot_2 = gr.Image(
325
- value=os.path.join(impath, "cluster_assign_mental_health_24_all.png"),
326
  label="Screenshot of the Profession Bias Tool | Results table: mental health professions, all models",
327
  )
328
- mental_helth_examlpars = gr.Gallery(
329
  [
330
- (Image.open(os.path.join(impath, im)), name)
331
  for im, name in [
332
- ("social_assistant_2_of_24.png", "Generated images of 'social assistant' assigned to cluster 2 of 24"),
333
- ("social_assistant_5_of_24.png", "Generated images of 'social assistant' assigned to cluster 5 of 24"),
334
- ("social_assistant_15_of_24.png", "Generated images of 'social assistant' assigned to cluster 15 of 24"),
335
- ("social_assistant_19_of_24.png", "Generated images of 'social assistant' assigned to cluster 19 of 24"),
336
- ("social_assistant_0_of_24.png", "Generated images of 'social assistant' assigned to cluster 0 of 24"),
337
- ("social_worker_2_of_24.png", "Generated images of 'social worker' assigned to cluster 2 of 24"),
338
- ("social_worker_5_of_24.png", "Generated images of 'social worker' assigned to cluster 5 of 24"),
339
- ("social_worker_15_of_24.png", "Generated images of 'social worker' assigned to cluster 15 of 24"),
340
- ("social_worker_19_of_24.png", "Generated images of 'social worker' assigned to cluster 19 of 24"),
341
- ("social_worker_0_of_24.png", "Generated images of 'social worker' assigned to cluster 0 of 24"),
342
- ("psychologist_2_of_24.png", "Generated images of 'psychologists' assigned to cluster 2 of 24"),
343
- ("psychologist_5_of_24.png", "Generated images of 'psychologists' assigned to cluster 5 of 24"),
344
- ("psychologist_15_of_24.png", "Generated images of 'psychologists' assigned to cluster 15 of 24"),
345
- ("psychologist_19_of_24.png", "Generated images of 'psychologists' assigned to cluster 19 of 24"),
346
- ("psychologist_0_of_24.png", "Generated images of 'psychologists' assigned to cluster 0 of 24"),
347
  ]
348
  ],
349
  label="Example images generated by three text-to-image models (Dall-E 2, Stable Diffusion v1.4 and v.2)",
@@ -354,39 +436,51 @@ with gr.Blocks() as demo:
354
  """
355
  #### [Focused Comparison: Mental Health Professions](https://hf.co/spaces/society-ethics/DiffusionClustering "you can cycle through screenshots of the tool in use on the left and example images below, or go straight to the interactive demo")
356
 
357
- If we look at the cluster representation of professions such as social assistant and social worker,
358
- we can observe that the former is best represented by **Cluster 2**, whereas the latter has a more uniform representation across multiple clusters:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
 
360
- Cluster 2 is best represented by the gender term is *woman* (81%) as well as *Latinx* (19%).
361
- This gender proportion is exactly the same as the one provided by the United States Labor Bureau (which you can see in the table above), with 81% of social assistants identifying as women.
 
 
 
362
  """
363
  )
364
- if False:
365
- # with gr.Row():
366
- mental_helth_examlpars = gr.Gallery(
367
- [
368
- (Image.open(os.path.join(impath, im)), name)
369
- for im, name in [
370
- ("social_assistant_0_of_24.png", "Generated images of 'social assistant' assigned to cluster 0 of 24"),
371
- ("social_assistant_2_of_24.png", "Generated images of 'social assistant' assigned to cluster 2 of 24"),
372
- ("social_assistant_5_of_24.png", "Generated images of 'social assistant' assigned to cluster 5 of 24"),
373
- ("social_assistant_0_of_24.png", "Generated images of 'social assistant' assigned to cluster 0 of 24"),
374
- ("social_assistant_0_of_24.png", "Generated images of 'social assistant' assigned to cluster 0 of 24"),
375
- ("social_worker_0_of_24.png", "Generated images of 'social worker' assigned to cluster 0 of 24"),
376
- ("social_worker_2_of_24.png", "Generated images of 'social worker' assigned to cluster 2 of 24"),
377
- ("social_worker_5_of_24.png", "Generated images of 'social worker' assigned to cluster 5 of 24"),
378
- ("social_worker_0_of_24.png", "Generated images of 'social worker' assigned to cluster 0 of 24"),
379
- ("social_worker_0_of_24.png", "Generated images of 'social worker' assigned to cluster 0 of 24"),
380
- ("psychologist_0_of_24.png", "Generated images of 'psychologists' assigned to cluster 0 of 24"),
381
- ("psychologist_2_of_24.png", "Generated images of 'psychologists' assigned to cluster 2 of 24"),
382
- ("psychologist_5_of_24.png", "Generated images of 'psychologists' assigned to cluster 5 of 24"),
383
- ("psychologist_0_of_24.png", "Generated images of 'psychologists' assigned to cluster 0 of 24"),
384
- ("psychologist_0_of_24.png", "Generated images of 'psychologists' assigned to cluster 0 of 24"),
385
- ]
386
  ],
387
- label="Example images generated by three text-to-image models (Dall-E 2, Stable Diffusion v1.4 and v.2).",
388
- show_label=False,
389
- ).style(grid=[3, 5], height="auto")
 
 
 
 
 
 
 
390
 
391
  gr.Markdown(
392
  """
 
34
  15: ("cluster_15_of_24_woman_white.JPG", "Cluster 15 of 24"),
35
  }
36
 
37
+ _BIAS_STATS_SCREEN_SHOTS = {
38
+ "Results table: all models": "cluster_assign_24_all.png",
39
+ "Results table: Stable Diffusion v1.4": "cluster_assign_24_sd14.png",
40
+ "Results table: Stable Diffusion v2.": "cluster_assign_24_sd2.png",
41
+ "Results table: Stable Diffusion Dall-E 2": "cluster_assign_24_dalle.png",
42
+ "Comparison histogram: all professions": "all_profs_histo_24.png",
43
+ "CEO examplars: Cluster 5": "ceo_5_of_24.png",
44
+ "CEO examplars: Cluster 6": "ceo_6_of_24.png",
45
+ "Results table: mental health professions, all models": "cluster_assign_mental_health_24_all.png",
46
+ "Comparison histogram: psychologist": "psychologist_histo_24.png",
47
+ "Comparison histogram: social worker": "social_worker_histo_24.png",
48
+ "Comparison histogram: social assistant": "social_assistant_histo_24.png",
49
+ }
50
+
51
 
52
  def get_images(path):
53
  images = [Image.open(os.path.join(path, im)) for im in os.listdir(path)]
 
55
  return [(im, path) for im, path in zip(images, paths)]
56
 
57
 
58
+ impath_id = "images/identities"
59
+
60
  def show_id_images(cl_id_1, cl_id_2, cl_id_3):
61
  img_path_1, cluster_name_1 = _ID_CLUSTER_SCREEN_SHOTS[cl_id_1]
62
  img_path_2, cluster_name_2 = _ID_CLUSTER_SCREEN_SHOTS[cl_id_2]
63
  img_path_3, cluster_name_3 = _ID_CLUSTER_SCREEN_SHOTS[cl_id_3]
64
  return (
65
  gr.update(
66
+ value=os.path.join(impath_id, img_path_1),
67
  label=f"Screenshot of the Identity Exploration tool for: {cluster_name_1}",
68
  ),
69
  gr.update(
70
+ value=os.path.join(impath_id, img_path_2),
71
  label=f"Screenshot of the Identity Exploration tool for: {cluster_name_2}",
72
  ),
73
  gr.update(
74
+ value=os.path.join(impath_id, img_path_3),
75
  label=f"Screenshot of the Identity Exploration tool for: {cluster_name_3}",
76
  ),
77
  )
78
 
79
 
80
+ impath_bias = "images/bias"
81
+
82
+ def show_bias_images(screen_id_1, screen_id_2):
83
+ img_path_1 = _BIAS_STATS_SCREEN_SHOTS[screen_id_1]
84
+ img_path_2 = _BIAS_STATS_SCREEN_SHOTS[screen_id_2]
85
+ return (
86
+ gr.update(
87
+ value=os.path.join(impath_bias, img_path_1),
88
+ label=f"Screenshot of the Profession Bias Tool | {screen_id_1}",
89
+ ),
90
+ gr.update(
91
+ value=os.path.join(impath_bias, img_path_2),
92
+ label=f"Screenshot of the Profession Bias Tool | {screen_id_2}",
93
+ ),
94
+ )
95
+
96
+
97
  with gr.Blocks() as demo:
98
  gr.Markdown(
99
  """
 
173
  to showcase the visual trends encoded in these clusters - as well as their relation to the social variables under consideration.
174
  """
175
  )
 
176
  with gr.Row():
177
  with gr.Column(scale=1):
178
  gr.Markdown(
 
204
  show_label=False,
205
  )
206
  identity_screenshot_1 = gr.Image(
207
+ value=os.path.join(impath_id, "cluster_2_of_24_latinx_woman.JPG"),
208
  label="Screenshot of the Identity Exploration tool for: Cluster 2 of 24",
209
  )
210
  with gr.Row():
 
216
  )
217
  identity_screenshot_2 = gr.Image(
218
  value=os.path.join(
219
+ impath_id, "cluster_3_of_24_native_american_stereetotypical.JPG"
220
  ),
221
  label="Screenshot of the Identity Exploration tool for: Cluster 3 of 24",
222
  )
 
258
  The clusters with the most examples of prompts with unspecified gender and unspecified ethnicity terms are **clusters 5 and 19**,
259
  and both are also strongly associated with the words *man*, *White*, and *Causian*.
260
  This association holds across genders (as showcased by **cluster 15**, which has a majority of *woman* and *White* prompts along with unspecified ethnicity)
261
+ and across ethnicities (comparing the proportions of unspecified genders in **clusters 0 and 6**: 18 % and 38% for the clusters with more *woman* and more *man* respectively along with the *African American* phrases).
262
 
263
  This provides the beginning of an answer to our motivating question: since users rarely specify an explicit gender or ethnicity when using
264
  these systems to generate images of people, the high likelihood of defaulting to *Whiteness* and *masculinity* is likely to at least partially explain the observed lack of diversity in the outputs.
 
273
  )
274
  identity_screenshot_3 = gr.Image(
275
  value=os.path.join(
276
+ impath_id, "cluster_19_of_24_unmarked_white_unmarked_man.JPG"
277
  ),
278
  label="Screenshot of the Identity Exploration tool for: Cluster 19 of 24",
279
  )
280
+ demo.load(
281
+ show_id_images,
282
+ inputs=[id_cl_id_1, id_cl_id_2, id_cl_id_3],
283
+ outputs=[
284
+ identity_screenshot_1,
285
+ identity_screenshot_2,
286
+ identity_screenshot_3,
287
+ ],
288
+ )
289
  for var in [id_cl_id_1, id_cl_id_2, id_cl_id_3]:
290
  var.change(
291
  show_id_images,
 
301
  """
302
  ### Quantifying Social Biases in Image Generations: Professions
303
 
304
+ In the previous section, we provided a method to characterize how text-to-image systems
305
+ associate textual mentions of identity characteristics with visual features in their outputs.
306
+ Exploring the inherent representations learned through our method already gave us insights into some
307
+ of the bias dynamics embedded in the models that support these systems.
308
+ We take the analysis further in the present section by leveraging these represenations to study **social biases tied to a particular application
309
+ setting:** specifically, we consider a setting where users generate pictures of people in professional settings, for example to use in place of
310
+ stock images in articles or websites.
311
+ To that end, we systematically generate a large variety of images for 150 professions for each of 3 systems,
312
+ and compare the distribution of the visual features identified in the [Identity Representation Demo](https://hf.co/spaces/society-ethics/DiffusionFaceClustering)
313
+ across all generations and across generations for each profession.
314
+ You can explore these results in detail in the companion [Profession Bias Tool](https://hf.co/spaces/society-ethics/DiffusionClustering) -
315
+ in particular, you can read about the resulting **diversity metric** for different models and professions,
316
+ as well as a study case for **comparing the representations of different mental health professions**, by expanding the accordion below:
317
  """
318
  )
319
  with gr.Accordion("Quantifying Social Biases in Image Generations: Professions", open=False):
320
  gr.Markdown(
321
  """
322
  <br/>
 
323
 
324
+ Machine Learning models encode and amplify biases that are represented in the data that they are trained on -
325
+ this can include, for instance, stereotypes around the demographic makeup of different professions.
326
+ When the models are used in image generation systems that then contibute to the distribution of web imaged through integration
327
+ in image edition software or stock imagery services, these representation biases can then feed into complex sociotechnical systems.
328
+ For example, previous work has shown that seeing less diverse image search engine results can negatively impact under-represented groups' sense of belonging in their job,
329
+ or reinforce implicit biases in hiring that then lead to further exclusion and in turn again to less diverse representation.
330
+
331
+ As previously mentioned, measuring the extent of those biases in model outputs is complicated by the lack of inherent social charcteristics
332
+ for the synthetic people depicted in the generated images. In this section, we show how to leverage the [identity clusters](https://hf.co/spaces/society-ethics/DiffusionFaceClustering)
333
+ introduced in the previous section to address this difficulty.
334
+ For each of the 3 image generation systems, we generate a dataset of images corresponding to prompts of the format *"Photo portrait of a **(adjective)** **[profession]**"*.
335
+ The adjectives serve both as a source of additional variability and as the focus of their own analysis (see our paper).
336
+ We use a list of 20 adjectives and 150 professions from the [US Bureau of Labor Statistics (BLS)](https://www.bls.gov/cps/cpsaat11.htm).
337
+ We then assign each image to a cluster based on a dot product between the image embedding and cluster centroid.
338
+ This allows us to answer our motivating question about quantifying whether images depicting a certain professions are **more likely to look like** images
339
+ corresponding to prompts explicitly mentioning specific genders or ethnicities (for example, mostly *man* and mostly *White* for images in cluster 5)
340
+ without assigning an identity charasteristic to an individual generation for *e.g.* a *"Photo portrait of a compassionate CEO"* prompt.
341
+
342
+ The results are presented in both tabs of the [Profession Bias Tool](https://hf.co/spaces/society-ethics/DiffusionClustering).
343
+ The **Professions Overview** tab lets users select a system (or *All Models*) and a subset of professions (or *all professions* together)
344
+ to print a table showing its distribution over the top 8 identity clusters, its diversity as measured by the entropy of this distribution,
345
+ and the gender ratio for this profession in the US as reported by the [BLS](https://www.bls.gov/cps/cpsaat11.htm "specifically, the reported proportion of women").
346
+ It also provides a summary description of the clusters for convenience.
347
+ The **Profession Focus** tab lets users select a single profession and compares the distribution across identity clusters for all image generation systems in a histogram.
348
+ It also provides examples of images generated for the profession that are assigned to each of the clusters.
349
  """
350
  )
 
351
  with gr.Row():
352
  with gr.Column(scale=1):
353
  gr.Markdown(
354
  """
355
  #### [Diversity and Representation across Models](https://hf.co/spaces/society-ethics/DiffusionClustering "you can cycle through screenshots of the tool in use on the right, or go straight to the interactive demo")
356
 
357
+ We start by looking at the summary statistics provided in the [**Professions Overview** tab](https://hf.co/spaces/society-ethics/DiffusionClustering "select screenshots right or go straight to the demo").
358
+ **Clusters 5 and 19**, which are mostly made up of images generated for prompts mentioning *man*, *White*, and *Caucasian*,
359
+ are the most represented across systems and professions, accounting for 53.6 % of generations.
360
+ This is over-representation (less than 31% of respondents to the 2020 US census checked the boxes for *man* and *White*) is not equally distributed however:
361
+ these two cluster make up 69.7% of the *CEO* images, but only 35.7 and 22.1% for *fast food worker* and *social worker* respectively.
362
 
363
+ Next, compare the tables for systems Stable Diffusion v1.4, v2, and Dall-E 2.
364
+ These same clusters are somewhat less over-represented in Stable Siffusion v1.4, accounting for 39.6% of generations across professions and 43.3% for *CEO*,
365
+ but Dall-E 2 shows the strongest disparities with 68.7 and 87.7 for all professions and *CEO* respectively.
366
+ Stable Diffusion v2 falls in the middle (52.4 all, 78.1 *CEO*).
367
+ The same ordering is reflected in the entropy-based diversity metric, with Stable Diffusion v1.4 measured as the most diverse (2.2)
368
+ followed by v2 (1.9), and Dall-E 2 (1.7).
369
+
370
+ These three systems correspond to different approaches and design choices, especially in terms of filtering of the pre-training data for Stable Diffusion v2 and Dall-E 2,
371
+ guided by concerns of performance (esthetic value of the images, relevance of the image to the prompt) and risks of generating unwanted content
372
+ (usually with a focus on sexual themes and violence). While these two aspects are undoubtedly important,
373
+ the results we present here suggest that much more care needs to be taken in ensuring that these intervention do not exacerbate social dynamics that further erase marginalized populations.
374
  """
375
  )
376
  with gr.Column(scale=1):
 
378
  choices=[
379
  "Results table: all models",
380
  "Results table: Stable Diffusion v1.4",
381
+ "Results table: Stable Diffusion v2.",
382
  "Results table: Stable Diffusion Dall-E 2",
383
  "Comparison histogram: all professions",
384
+ "CEO examplars: Cluster 5",
385
+ "CEO examplars: Cluster 6",
386
  ],
387
  value="Results table: all models",
388
  show_label=False,
389
  )
390
  bias_screenshot_1 = gr.Image(
391
+ value=os.path.join(impath_bias, "cluster_assign_24_all.png"),
392
  label="Screenshot of the Profession Bias Tool | Results table: all models",
393
  )
394
  with gr.Row():
 
404
  show_label=False,
405
  )
406
  bias_screenshot_2 = gr.Image(
407
+ value=os.path.join(impath_bias, "cluster_assign_mental_health_24_all.png"),
408
  label="Screenshot of the Profession Bias Tool | Results table: mental health professions, all models",
409
  )
410
+ mental_health_examlpars = gr.Gallery(
411
  [
412
+ (Image.open(os.path.join(impath_bias, im)), name)
413
  for im, name in [
414
+ ("psychologist_2_of_24.png", "2 - psychologists"),
415
+ ("psychologist_5_of_24.png", "5 - psychologists"),
416
+ ("psychologist_15_of_24.png", "15 - psychologists"),
417
+ ("psychologist_19_of_24.png", "19 - psychologists"),
418
+ ("psychologist_0_of_24.png", "0 - psychologists"),
419
+ ("social_assistant_2_of_24.png", "2 - social assistant"),
420
+ ("social_assistant_5_of_24.png", "5 - social assistant"),
421
+ ("social_assistant_15_0f_24.png", "15 - social assistant"),
422
+ ("social_assistant_19_of_24.png", "19 - social assistant"),
423
+ ("social_assistant_0_of_24.png", "0 - social assistant"),
424
+ ("social_worker_2_of_24.png", "2 - social worker"),
425
+ ("social_worker_5_of_24.png", "5 - social worker"),
426
+ ("social_worker_15_of_24.png", "15 - social worker"),
427
+ ("social_worker_19_of_24.png", "19 - social worker"),
428
+ ("social_worker_0_of_24.png", "0 - social worker"),
429
  ]
430
  ],
431
  label="Example images generated by three text-to-image models (Dall-E 2, Stable Diffusion v1.4 and v.2)",
 
436
  """
437
  #### [Focused Comparison: Mental Health Professions](https://hf.co/spaces/society-ethics/DiffusionClustering "you can cycle through screenshots of the tool in use on the left and example images below, or go straight to the interactive demo")
438
 
439
+ We can also leverage the [Profession Bias Tool](https://hf.co/spaces/society-ethics/DiffusionClustering) for more focused analysis.
440
+ For this case study, we use it to compare the distribution over identity clusters for three professions related to mental health and care:
441
+ [social assistant](https://www.bls.gov/ooh/community-and-social-service/social-and-human-service-assistants.htm),
442
+ [social worker](https://www.bls.gov/ooh/community-and-social-service/social-workers.htm), and [psychologist](https://www.bls.gov/ooh/life-physical-and-social-science/psychologists.htm).
443
+ We can see that the BLS reports a significant majority of women for all three professions, from 74.9 to 83.6 percent of the workforce.
444
+ However, the BLS also reports different degree requirements and significant differences in median income,
445
+ with *social assistants* at **18$/hour**, *social workers* at **24$/hour**, and *psychologists* at **39$/hour**.
446
+ Going back to the proportions of clusters 5 and 19, we find that this corresponds to clusters associated with the terms *White* and *man*
447
+ accounting for **10.5%**, **22.1%**, and **49.8%** of generated images respectively for the prompts mentioning all three professions.
448
+ These findings underline the urgency of also **including socioeconomic factors in fairness evaluations.**
449
+
450
+ The **Profession Focus** tab provides further information to help interpret these results.
451
+ First, the comparative histograms (screenshots to the left) show how the trends hold across models.
452
+ Generations from all models follow the ordering outlined above, although we do see differences in the distribution for the *social worker* prompts
453
+ between the Stable Diffusion v.2. and Dall-E 2 systems.
454
+ Second, we show examplars of image generations for each professions that are assigned to each of the identity clusters (screenshot to the left, below).
455
+ This serves the dual purpose of confirming our intuitions about the visual features encoded in the cluster assignments and of making it easier to find examples
456
+ of profession image generations that are likely to showcase specific features.
457
+ For example, we see in the table that **cluster 0**, which is made up primarily of prompts featuring the phrases *woman* and *African American*,
458
+ is better represented for profession images generated for *social worker* prompts - the tool lets us visually inspect the images for this profession assigned to that cluster.
459
 
460
+ ***Note on the cluster examplar selection:*** In order to also show the limitations of the cluster assignment, we show the three images that are
461
+ closest to the cluster centroid, two close to the median distance, and finally the three images that are assigned to the cluster with the least confidence.
462
+ This shows atypical generations often with text printed (or something that looks like text), black and white pictures, pictures with multiple faces, and, in one case, a dog.
463
+ Given the stochastic nature of the generation, finding surprising generations across the 90,000+ images we generated is not wholly unexpected.
464
+ We provide more details on how we identified outliers across the image generations in the section entitled **Exploring the Pixel Space of Generated Images**.
465
  """
466
  )
467
+ demo.load(
468
+ show_bias_images,
469
+ inputs=[bias_cl_id_1, bias_cl_id_2],
470
+ outputs=[
471
+ bias_screenshot_1,
472
+ bias_screenshot_2,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
473
  ],
474
+ )
475
+ for var in [bias_cl_id_1, bias_cl_id_2]:
476
+ var.change(
477
+ show_bias_images,
478
+ inputs=[bias_cl_id_1, bias_cl_id_2],
479
+ outputs=[
480
+ bias_screenshot_1,
481
+ bias_screenshot_2,
482
+ ],
483
+ )
484
 
485
  gr.Markdown(
486
  """