Spaces:

society-ethics
/

DiffusionFaceClustering

Running

App Files Files Community

meg-branch

by meg HF staff - opened Feb 15, 2023

base: refs/heads/main

←

from: refs/pr/3

Discussion Files changed

+79

-39

Files changed (1) hide show

app.py +79 -39

app.py CHANGED Viewed

@@ -17,28 +17,39 @@ clusters_by_size = {
     48: clusters_48,
 }
 def to_string(label):
     if label == "SD_2":
-        label = "Stable Diffusion 2"
     elif label == "SD_14":
-        label = "Stable Diffusion 14"
     elif label == "DallE":
         label = "Dall-E 2"
     return label
 def describe_cluster(cl_dict, block="label"):
     labels_values = sorted(cl_dict.items(), key=operator.itemgetter(1))
     labels_values.reverse()
     total = float(sum(cl_dict.values()))
-    lv_prcnt = list((item[0], round(item[1] * 100/total, 0)) for item in labels_values)
     top_label = lv_prcnt[0][0]
-    description_string = "<span>The most represented %s is <b>%s</b>, making up about %d%% of the cluster.</span>" % (block, to_string(lv_prcnt[0][0]), lv_prcnt[0][1])
     description_string += "<p>This is followed by: "
     for lv in lv_prcnt[1:]:
         description_string += "<BR/><b>%s:</b> %d%%" % (to_string(lv[0]), lv[1])
     description_string += "</p>"
     return description_string
 def show_cluster(cl_id, num_clusters):
     if not cl_id:
         cl_id = 0
@@ -47,60 +58,89 @@ def show_cluster(cl_id, num_clusters):
     cl_dct = clusters_by_size[num_clusters][cl_id]
     images = []
     for i in range(6):
-        img_path = "/".join([st.replace("/", "") for st in cl_dct['img_path_list'][i].split("//")][3:])
-        images.append((Image.open(os.path.join("identities-images", img_path)), "_".join([img_path.split("/")[0], img_path.split("/")[-1]]).replace('Photo_portrait_of_an_','').replace('Photo_portrait_of_a_','').replace('SD_v2_random_seeds_identity_','(SD v.2) ').replace('dataset-identities-dalle2_','(Dall-E 2) ').replace('SD_v1.4_random_seeds_identity_','(SD v.1.4) ').replace('_',' ')))
     model_fig = go.Figure()
-    model_fig.add_trace(go.Pie(labels=list(dict(cl_dct["labels_model"]).keys()),
-                               values=list(dict(cl_dct["labels_model"]).values())))
     model_description = describe_cluster(dict(cl_dct["labels_model"]), "model")
     gender_fig = go.Figure()
-    gender_fig.add_trace(go.Pie(labels=list(dict(cl_dct["labels_gender"]).keys()),
-                                values=list(dict(cl_dct["labels_gender"]).values())))
-    gender_description = describe_cluster(dict(cl_dct["labels_gender"]), "gender")
     ethnicity_fig = go.Figure()
-    ethnicity_fig.add_trace(go.Bar(x=list(dict(cl_dct["labels_ethnicity"]).keys()),
-                                   y=list(dict(cl_dct["labels_ethnicity"]).values()),
-                                   marker_color=px.colors.qualitative.G10))
     return (len(cl_dct['img_path_list']),
-            gender_fig,gender_description,
             model_fig, model_description,
             ethnicity_fig,
             images,
-            gr.update(maximum=num_clusters-1))
 with gr.Blocks(title=TITLE) as demo:
     gr.Markdown(f"# {TITLE}")
-    gr.Markdown("## Explore the data generated from [DiffusionBiasExplorer](https://huggingface.co/spaces/society-ethics/DiffusionBiasExplorer)!")
-    gr.Markdown("### This demo showcases patterns in the images generated from different prompts input to Stable Diffusion and Dalle-2 diffusion models.")
-    gr.Markdown("### Below, see results on how the images from different prompts cluster together.")
-    gr.HTML("""<span style="color:red" font-size:smaller>⚠️ DISCLAIMER: the images displayed by this tool were generated by text-to-image models and may depict offensive stereotypes or contain explicit content.</span>""")
-    num_clusters = gr.Radio([12,24,48], value=12, label="How many clusters do you want to make from the data?")
     with gr.Row():
         with gr.Column(scale=4):
-            gallery = gr.Gallery(label="Most representative images in cluster").style(grid=(3,3))
         with gr.Column():
-            cluster_id = gr.Slider(minimum=0, maximum=num_clusters.value-1, step=1, value=0, label="Click to move between clusters")
             a = gr.Text(label="Number of images")
     with gr.Row():
-            with gr.Column(scale=1):
-                c = gr.Plot(label="How many images from each model?")
-                c_desc = gr.HTML(label="")
-            with gr.Column(scale=1):
-                b = gr.Plot(label="How many genders are represented?")
-                b_desc = gr.HTML(label="")
-            with gr.Column(scale=2):
-                d = gr.Plot(label="Which ethnicities are present?")
-    gr.Markdown(f"The 'Model makeup' plot corresponds to the number of images from the cluster that come from each of the TTI systems that we are comparing: Dall-E 2, Stable Diffusion v.1.4. and Stable Diffusion v.2.")
-    gr.Markdown('The Gender plot shows the number of images based on the input prompts that used the words man, woman, non-binary, and unmarked, which we label "person".')
-    gr.Markdown(f"The 'Ethnicity label makeup' plot corresponds to the number of images from each of the 18 ethnicities used in the prompts. A blank value means unmarked ethnicity.")
-    demo.load(fn=show_cluster, inputs=[cluster_id, num_clusters], outputs=[a, b, b_desc, c, c_desc, d, gallery, cluster_id])
-    num_clusters.change(fn=show_cluster, inputs=[cluster_id, num_clusters], outputs=[a, b, b_desc, c, c_desc, d, gallery, cluster_id])
-    cluster_id.change(fn=show_cluster, inputs=[cluster_id, num_clusters], outputs=[a, b, b_desc, c, c_desc, d, gallery, cluster_id])
 if __name__ == "__main__":
-    demo.queue().launch(debug=True)

     48: clusters_48,
 }
 def to_string(label):
     if label == "SD_2":
+        label = "Stable Diffusion 2.0"
     elif label == "SD_14":
+        label = "Stable Diffusion 1.4"
     elif label == "DallE":
         label = "Dall-E 2"
+    elif label == "non-binary":
+        label = "non-binary person"
+    elif label == "person":
+        label = "<i>unmarked</i> (person)"
+    elif label == "gender":
+        label = "gender term"
     return label
 def describe_cluster(cl_dict, block="label"):
     labels_values = sorted(cl_dict.items(), key=operator.itemgetter(1))
     labels_values.reverse()
     total = float(sum(cl_dict.values()))
+    lv_prcnt = list(
+        (item[0], round(item[1] * 100 / total, 0)) for item in labels_values)
     top_label = lv_prcnt[0][0]
+    description_string = "<span>The most represented %s is <b>%s</b>, making up about <b>%d%%</b> of the cluster.</span>" % (
+    to_string(block), to_string(top_label), lv_prcnt[0][1])
     description_string += "<p>This is followed by: "
     for lv in lv_prcnt[1:]:
         description_string += "<BR/><b>%s:</b> %d%%" % (to_string(lv[0]), lv[1])
     description_string += "</p>"
     return description_string
 def show_cluster(cl_id, num_clusters):
     if not cl_id:
         cl_id = 0
     cl_dct = clusters_by_size[num_clusters][cl_id]
     images = []
     for i in range(6):
+        img_path = "/".join([st.replace("/", "") for st in
+                             cl_dct['img_path_list'][i].split("//")][3:])
+        images.append((Image.open(os.path.join("identities-images", img_path)),
+                       "_".join([img_path.split("/")[0],
+                                 img_path.split("/")[-1]]).replace(
+                           'Photo_portrait_of_an_', '').replace(
+                           'Photo_portrait_of_a_', '').replace(
+                           'SD_v2_random_seeds_identity_', '(SD v.2) ').replace(
+                           'dataset-identities-dalle2_', '(Dall-E 2) ').replace(
+                           'SD_v1.4_random_seeds_identity_',
+                           '(SD v.1.4) ').replace('_', ' ')))
     model_fig = go.Figure()
+    model_fig.add_trace(go.Pie(labels=list(dict(cl_dct["labels_model"]).keys()),
+                               values=list(
+                                   dict(cl_dct["labels_model"]).values())))
     model_description = describe_cluster(dict(cl_dct["labels_model"]), "model")
     gender_fig = go.Figure()
+    gender_fig.add_trace(
+        go.Pie(labels=list(dict(cl_dct["labels_gender"]).keys()),
+               values=list(dict(cl_dct["labels_gender"]).values())))
+    gender_description = describe_cluster(dict(cl_dct["labels_gender"]),
+                                          "gender")
     ethnicity_fig = go.Figure()
+    ethnicity_fig.add_trace(
+        go.Bar(x=list(dict(cl_dct["labels_ethnicity"]).keys()),
+               y=list(dict(cl_dct["labels_ethnicity"]).values()),
+               marker_color=px.colors.qualitative.G10))
     return (len(cl_dct['img_path_list']),
+            gender_fig, gender_description,
             model_fig, model_description,
             ethnicity_fig,
             images,
+            gr.update(maximum=num_clusters - 1))
 with gr.Blocks(title=TITLE) as demo:
     gr.Markdown(f"# {TITLE}")
+    gr.Markdown(
+        "## Explore the data generated from [DiffusionBiasExplorer](https://huggingface.co/spaces/society-ethics/DiffusionBiasExplorer)!")
+    gr.Markdown(
+        "### This demo showcases patterns in the images generated from different prompts input to Stable Diffusion and Dalle-2 diffusion models.")
+    gr.Markdown(
+        "### Below, see results on how the images from different prompts cluster together.")
+    gr.HTML(
+        """<span style="color:red" font-size:smaller>⚠️ DISCLAIMER: the images displayed by this tool were generated by text-to-image models and may depict offensive stereotypes or contain explicit content.</span>""")
+    num_clusters = gr.Radio([12, 24, 48], value=12,
+                            label="How many clusters do you want to make from the data?")
     with gr.Row():
         with gr.Column(scale=4):
+            gallery = gr.Gallery(
+                label="Most representative images in cluster").style(
+                grid=(3, 3))
         with gr.Column():
+            cluster_id = gr.Slider(minimum=0, maximum=num_clusters.value - 1,
+                                   step=1, value=0,
+                                   label="Click to move between clusters")
             a = gr.Text(label="Number of images")
     with gr.Row():
+        with gr.Column(scale=1):
+            c = gr.Plot(label="How many images from each model?")
+            c_desc = gr.HTML(label="")
+        with gr.Column(scale=1):
+            b = gr.Plot(label="How many gender terms are represented?")
+            b_desc = gr.HTML(label="")
+        with gr.Column(scale=2):
+            d = gr.Plot(label="Which ethnicity terms are present?")
+    gr.Markdown(
+        f"The 'Model makeup' plot corresponds to the number of images from the cluster that come from each of the TTI systems that we are comparing: Dall-E 2, Stable Diffusion v.1.4. and Stable Diffusion v.2.")
+    gr.Markdown(
+        'The Gender plot shows the number of images based on the input prompts that used the words man, woman, non-binary person, and unmarked, which we label "person".')
+    gr.Markdown(
+        f"The 'Ethnicity label makeup' plot corresponds to the number of images from each of the 18 ethnicities used in the prompts. A blank value means unmarked ethnicity.")
+    demo.load(fn=show_cluster, inputs=[cluster_id, num_clusters],
+              outputs=[a, b, b_desc, c, c_desc, d, gallery, cluster_id])
+    num_clusters.change(fn=show_cluster, inputs=[cluster_id, num_clusters],
+                        outputs=[a, b, b_desc, c, c_desc, d, gallery,
+                                 cluster_id])
+    cluster_id.change(fn=show_cluster, inputs=[cluster_id, num_clusters],
+                      outputs=[a, b, b_desc, c, c_desc, d, gallery, cluster_id])
 if __name__ == "__main__":
+    demo.queue().launch(debug=True)