Spaces:

society-ethics
/

DiffusionClustering

Runtime error

File size: 5,218 Bytes

import gradio as gr
import numpy as np
import pandas as pd

pd.options.plotting.backend = "plotly"


TITLE = "Diffusion Faces Cluster Explorer"
clusters_12 = pd.read_json("clusters/professions_to_clusters_12.json")
clusters_24 = pd.read_json("clusters/professions_to_clusters_24.json")
clusters_48 =  pd.read_json("clusters/professions_to_clusters_48.json")

clusters_by_size = {
    12: clusters_12,
    24: clusters_24,
    48: clusters_48,
}

prompts = pd.read_csv("promptsadjectives.csv")
# m_adjectives = prompts['Masc-adj'].tolist()[:10]
#  f_adjectives = prompts['Fem-adj'].tolist()[:10]
# adjectives = sorted(m_adjectives+f_adjectives)
# adjectives.insert(0, '')
professions = list(sorted([p.lower() for p in prompts["Occupation-Noun"].tolist()]))
models = {
    "All": "All Models",
    "SD_14": "Stable Diffusion 1.4",
    "SD_2": "Stable Diffusion 2",
    "DallE": "Dall-E 2",
}


def make_profession_plot(num_clusters, prof_name):
    pre_pandas = dict(
        [
            (
                models[mod_name],
                dict(
                    (
                        f"Cluster {k}",
                        clusters_by_size[num_clusters][mod_name][prof_name][
                            "cluster_proportions"
                        ][k],
                    )
                    for k, v in sorted(
                        clusters_by_size[num_clusters]["All"][prof_name][
                            "cluster_proportions"
                        ].items(),
                        key=lambda x: x[1],
                        reverse=True,
                    )
                    if v > 0
                ),
            )
            for mod_name in models
        ]
    )
    df = pd.DataFrame.from_dict(pre_pandas)
    prof_plot = df.plot(kind="bar", barmode="group")
    return prof_plot

def make_profession_table(num_clusters, prof_name):
    pre_pandas = dict(
        [
            (
                models[mod_name],
                dict(
                    (
                        f"Cluster {k}",
                        clusters_by_size[num_clusters][mod_name][prof_name][
                            "cluster_proportions"
                        ][k],
                    )
                    for k, v in sorted(
                        clusters_by_size[num_clusters]["All"][prof_name][
                            "cluster_proportions"
                        ].items(),
                        key=lambda x: x[1],
                        reverse=True,
                    )
                    if v > 0
                ),
            )
            for mod_name in models
        ]
    )
    df = pd.DataFrame.from_dict(pre_pandas)
    return df


with gr.Blocks() as demo:
    gr.Markdown("# 🤗 Diffusion Cluster Explorer")
    gr.Markdown("description will go here")
    with gr.Tab("Professions Overview"):
        gr.Markdown("TODO")
        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("Select the parameters here:")
                num_clusters = gr.Radio(
                    [12, 24, 48],
                    value=12,
                    label="How many clusters do you want to use to represent identities?",
                )
                model_choices = gr.Dropdown(list(models.values()), value='All Models', label="Which models do you want to compare?", multiselect=True, interactive= True)
                profession_choices_1 = gr.Dropdown(professions, value=["CEO", "social worker"], label= "Which professions do you want to compare?", multiselect=True, interactive=True)
            with gr.Column(scale=3):
                gr.Markdown("")
                order = gr.Dropdown(
                    ["entropy", "cluster/sum of clusters"],
                    value="entropy",
                    label="Order rows by:",
                    interactive=True,
                )
                table = gr.DataFrame(
                    label="Profession assignment per cluster"
                )
                profession_choices_1.change(
                    make_profession_table,
                    [num_clusters, profession_choices_1],
                    table,
                    queue=False,
                )

    #        with gr.Accordion("Tag Frequencies", open=False):

    with gr.Tab("Profession Focus"):
        with gr.Row():
            num_clusters = gr.Radio(
                [12, 24, 48],
                value=12,
                label="How many clusters do you want to use to represent identities?",
            )
        with gr.Row():
            with gr.Column():
                profession_choice = gr.Dropdown(
                    choices=professions, label="Select profession:"
                )
            with gr.Column():
                plot = gr.Plot(
                    label=f"Makeup of the cluster assignments for profession {profession_choice}"
                )
                profession_choice.change(
                    make_profession_plot,
                    [num_clusters, profession_choice],
                    plot,
                    queue=False,
                )
        with gr.Row():
            gr.Markdown("TODO: show examplars for cluster")


demo.launch()