File size: 5,218 Bytes
201ef5d
 
 
 
 
 
 
37b3ed3
44172c5
 
 
201ef5d
2fe028c
 
 
 
 
 
91c823d
 
 
 
 
 
 
 
 
 
 
 
 
2fe028c
17f86fd
91c823d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2fe028c
17f86fd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2fe028c
201ef5d
 
400023d
91c823d
37b3ed3
 
17f86fd
 
91c823d
 
 
 
 
17f86fd
 
 
2fe028c
91c823d
 
17f86fd
91c823d
 
 
17f86fd
 
 
 
 
 
 
 
 
201ef5d
91c823d
201ef5d
91c823d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17f86fd
91c823d
 
 
 
 
 
201ef5d
 
91c823d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import gradio as gr
import numpy as np
import pandas as pd

pd.options.plotting.backend = "plotly"


TITLE = "Diffusion Faces Cluster Explorer"
clusters_12 = pd.read_json("clusters/professions_to_clusters_12.json")
clusters_24 = pd.read_json("clusters/professions_to_clusters_24.json")
clusters_48 =  pd.read_json("clusters/professions_to_clusters_48.json")

clusters_by_size = {
    12: clusters_12,
    24: clusters_24,
    48: clusters_48,
}

prompts = pd.read_csv("promptsadjectives.csv")
# m_adjectives = prompts['Masc-adj'].tolist()[:10]
#  f_adjectives = prompts['Fem-adj'].tolist()[:10]
# adjectives = sorted(m_adjectives+f_adjectives)
# adjectives.insert(0, '')
professions = list(sorted([p.lower() for p in prompts["Occupation-Noun"].tolist()]))
models = {
    "All": "All Models",
    "SD_14": "Stable Diffusion 1.4",
    "SD_2": "Stable Diffusion 2",
    "DallE": "Dall-E 2",
}


def make_profession_plot(num_clusters, prof_name):
    pre_pandas = dict(
        [
            (
                models[mod_name],
                dict(
                    (
                        f"Cluster {k}",
                        clusters_by_size[num_clusters][mod_name][prof_name][
                            "cluster_proportions"
                        ][k],
                    )
                    for k, v in sorted(
                        clusters_by_size[num_clusters]["All"][prof_name][
                            "cluster_proportions"
                        ].items(),
                        key=lambda x: x[1],
                        reverse=True,
                    )
                    if v > 0
                ),
            )
            for mod_name in models
        ]
    )
    df = pd.DataFrame.from_dict(pre_pandas)
    prof_plot = df.plot(kind="bar", barmode="group")
    return prof_plot

def make_profession_table(num_clusters, prof_name):
    pre_pandas = dict(
        [
            (
                models[mod_name],
                dict(
                    (
                        f"Cluster {k}",
                        clusters_by_size[num_clusters][mod_name][prof_name][
                            "cluster_proportions"
                        ][k],
                    )
                    for k, v in sorted(
                        clusters_by_size[num_clusters]["All"][prof_name][
                            "cluster_proportions"
                        ].items(),
                        key=lambda x: x[1],
                        reverse=True,
                    )
                    if v > 0
                ),
            )
            for mod_name in models
        ]
    )
    df = pd.DataFrame.from_dict(pre_pandas)
    return df


with gr.Blocks() as demo:
    gr.Markdown("# 🤗 Diffusion Cluster Explorer")
    gr.Markdown("description will go here")
    with gr.Tab("Professions Overview"):
        gr.Markdown("TODO")
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("Select the parameters here:")
                num_clusters = gr.Radio(
                    [12, 24, 48],
                    value=12,
                    label="How many clusters do you want to use to represent identities?",
                )
                model_choices = gr.Dropdown(list(models.values()), value='All Models', label="Which models do you want to compare?", multiselect=True, interactive= True)
                profession_choices_1 = gr.Dropdown(professions, value=["CEO", "social worker"], label= "Which professions do you want to compare?", multiselect=True, interactive=True)
            with gr.Column(scale=3):
                gr.Markdown("")
                order = gr.Dropdown(
                    ["entropy", "cluster/sum of clusters"],
                    value="entropy",
                    label="Order rows by:",
                    interactive=True,
                )
                table = gr.DataFrame(
                    label="Profession assignment per cluster"
                )
                profession_choices_1.change(
                    make_profession_table,
                    [num_clusters, profession_choices_1],
                    table,
                    queue=False,
                )

    #        with gr.Accordion("Tag Frequencies", open=False):

    with gr.Tab("Profession Focus"):
        with gr.Row():
            num_clusters = gr.Radio(
                [12, 24, 48],
                value=12,
                label="How many clusters do you want to use to represent identities?",
            )
        with gr.Row():
            with gr.Column():
                profession_choice = gr.Dropdown(
                    choices=professions, label="Select profession:"
                )
            with gr.Column():
                plot = gr.Plot(
                    label=f"Makeup of the cluster assignments for profession {profession_choice}"
                )
                profession_choice.change(
                    make_profession_plot,
                    [num_clusters, profession_choice],
                    plot,
                    queue=False,
                )
        with gr.Row():
            gr.Markdown("TODO: show examplars for cluster")


demo.launch()