patrickvonplaten's picture
finish
0483c7f
raw
history blame
6.14 kB
from datasets import load_dataset
from collections import Counter, defaultdict
import pandas as pd
from huggingface_hub import list_datasets
import os
import gradio as gr
parti_prompt_results = []
ORG = "diffusers-parti-prompts"
SUBMISSIONS = {
"sd-v1-5": None,
"sd-v2-1": None,
"if-v1-0": None,
"karlo": None,
}
LINKS = {
"sd-v1-5": "https://huggingface.co/runwayml/stable-diffusion-v1-5",
"sd-v2-1": "https://huggingface.co/stabilityai/stable-diffusion-2-1",
"if-v1-0": "https://huggingface.co/DeepFloyd/IF-I-XL-v1.0",
"karlo": "https://huggingface.co/kakaobrain/karlo-v1-alpha",
}
MODEL_KEYS = "-".join(SUBMISSIONS.keys())
SUBMISSION_ORG = f"results-{MODEL_KEYS}"
submission_names = list(SUBMISSIONS.keys())
parti_prompt_categories = load_dataset(os.path.join(ORG, "sd-v1-5"))["train"]["Category"]
parti_prompt_challenge = load_dataset(os.path.join(ORG, "sd-v1-5"))["train"]["Challenge"]
def load_submissions():
all_datasets = list_datasets(author=SUBMISSION_ORG)
relevant_ids = [d.id for d in all_datasets]
ids = defaultdict(list)
challenges = defaultdict(list)
categories = defaultdict(list)
for _id in relevant_ids:
ds = load_dataset(_id)["train"]
for result, image_id in zip(ds["result"], ds["id"]):
ids[result].append(image_id)
challenges[parti_prompt_challenge[image_id]].append(result)
categories[parti_prompt_categories[image_id]].append(result)
all_values = sum(len(v) for v in ids.values())
main_dict = {k: '{:.2%}'.format(len(v)/all_values) for k, v in ids.items()}
challenges = {k: Counter(v) for k, v in challenges.items()}
categories = {k: Counter(v) for k, v in categories.items()}
return main_dict, challenges, categories
def sort_by_highest_percentage(df):
# Convert percentage values to numeric format
for column in df.columns.to_list():
df[column] = pd.to_numeric(df[column].str.rstrip('%'))
# Sort DataFrame by highest percentage first
df = df.sort_values(by=column, ascending=False)
# Convert back to percentage string format
df[column] = df[column].astype(str) + '%'
return df
def get_dataframe_all():
main, challenges, categories = load_submissions()
main_frame = pd.DataFrame([main])
challenges_frame = pd.DataFrame.from_dict(challenges).fillna(0).T
challenges_frame = challenges_frame.div(challenges_frame.sum(axis=1), axis=0)
challenges_frame = challenges_frame.applymap(lambda x: '{:.2%}'.format(x))
categories_frame = pd.DataFrame.from_dict(categories).fillna(0).T
categories_frame = categories_frame.div(categories_frame.sum(axis=1), axis=0)
categories_frame = categories_frame.applymap(lambda x: '{:.2%}'.format(x))
categories_frame = categories_frame.reset_index().rename(columns={'index': 'Category'})
challenges_frame = challenges_frame.reset_index().rename(columns={'index': 'Challenge'})
main_frame = sort_by_highest_percentage(main_frame)
categories_frame = categories_frame.reindex(columns=main_frame.columns.to_list())
challenges_frame = challenges_frame.reindex(columns=main_frame.columns.to_list())
return main_frame, challenges_frame, categories_frame
TITLE = "# Open Parti Prompts Leaderboard"
DESCRIPTION = """
*This leaderboard is retrieved from answers of [Community Evaluations on Parti Prompts](https://huggingface.co/spaces/OpenGenAI/open-parti-prompts)*
"""
EXPLANATION = """\n\n
## How the is data collected 📊 \n\n
In the [Community Parti Prompts](https://huggingface.co/spaces/OpenGenAI/open-parti-prompts), community members select for every prompt
of [Parti Prompts](https://huggingface.co/datasets/nateraw/parti-prompts) which open-source image generation model has generated the best image.
The community's answers are then stored and used in this space to give a human evaluation of the different models. \n\n
Currently the leaderboard includes the following models:
- [sd-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5)
- [sd-v2-1](https://huggingface.co/stabilityai/stable-diffusion-2-1)
- [if-v1-0](https://huggingface.co/DeepFloyd/IF-I-XL-v1.0)
- [karlo](https://huggingface.co/kakaobrain/karlo-v1-alpha) \n\n
In the following you can see three result tables. The first shows you the overall preferences across all prompts. The second and third tables
show you a breakdown analysis per category and per type of challenge as defined by [Parti Prompts](https://huggingface.co/datasets/nateraw/parti-prompts).
"""
GALLERY_COLUMN_NUM = len(SUBMISSIONS)
def refresh():
return get_dataframe_all()
with gr.Blocks() as demo:
with gr.Column(visible=True) as intro_view:
gr.Markdown(TITLE)
gr.Markdown(DESCRIPTION)
gr.Markdown(EXPLANATION)
headers = list(SUBMISSIONS.keys())
datatype = "str"
main_df, challenge_df, category_df = get_dataframe_all()
with gr.Column():
gr.Markdown("# Open Parti Prompts")
main_dataframe = gr.Dataframe(
value=main_df,
headers=main_df.columns.to_list(),
datatype="str",
row_count=main_df.shape[0],
col_count=main_df.shape[1],
interactive=False,
)
with gr.Column():
gr.Markdown("## per category")
cat_dataframe = gr.Dataframe(
value=category_df,
headers=category_df.columns.to_list(),
datatype="str",
row_count=category_df.shape[0],
col_count=category_df.shape[1],
interactive=False,
)
with gr.Column():
gr.Markdown("## per challenge")
chal_dataframe = gr.Dataframe(
value=challenge_df,
headers=challenge_df.columns.to_list(),
datatype="str",
row_count=challenge_df.shape[0],
col_count=challenge_df.shape[1],
interactive=False,
)
with gr.Row():
refresh_button = gr.Button("Refresh")
refresh_button.click(refresh, inputs=[], outputs=[main_dataframe, cat_dataframe, chal_dataframe])
demo.launch()