Spaces:

ZhangYuhan
/

3DGen-Arena

Running

App Files Files Community

ZhangYuhan commited on Jul 1, 2024

Commit

d75a844

1 Parent(s): ccbca0a

update leaderboard

Browse files

Files changed (11) hide show

app.py +12 -12
arena_elo/elo_rating/clean_battle_data.py +92 -95
arena_elo/elo_rating/elo_analysis.py +37 -19
arena_elo/elo_rating/generate_leaderboard.py +52 -32
arena_elo/results/latest/elo_results_image2shape.pkl +3 -0
arena_elo/results/latest/elo_results_text2shape.pkl +3 -0
arena_elo/results/latest/image2shape_leaderboard.csv +14 -0
arena_elo/results/latest/text2shape_leaderboard.csv +11 -0
model/model_registry.py +1 -1
serve/leaderboard.py +132 -77
serve/utils.py +1 -0

app.py CHANGED Viewed

@@ -26,7 +26,7 @@ def build_combine_demo(models, elo_results_file, leaderboard_table_file):
                         build_t2s_ui_single_model(models)
                     if elo_results_file:
                         with gr.Tab("Text-to-3D Leaderboard", id=3):
-                            build_leaderboard_tab(elo_results_file['t2s_generation'], leaderboard_table_file['t2s_generation'])
                     else:
                         with gr.Tab("Text-to-3D Leaderboard", id=3):
                             build_empty_leaderboard_tab()
@@ -43,7 +43,7 @@ def build_combine_demo(models, elo_results_file, leaderboard_table_file):
                         build_i2s_ui_single_model(models)
                     if elo_results_file:
                         with gr.Tab("Image-to-3D Leaderboard", id=8):
-                            build_leaderboard_tab(elo_results_file['i2s_generation'], leaderboard_table_file['i2s_generation'])
                     else:
                         with gr.Tab("Image-to-3D Leaderboard", id=8):
                             build_empty_leaderboard_tab()
@@ -62,17 +62,17 @@ def load_elo_results(elo_results_dir):
         elo_results_file = {}
         leaderboard_table_file = {}
         for file in elo_results_dir.glob('elo_results_*.pkl'):
-            if 't2s_generation' in file.name:
-                elo_results_file['t2s_generation'] = file
-            elif 'i2s_generation' in file.name:
-                elo_results_file['i2s_generation'] = file
             else:
                 raise ValueError(f"Unknown file name: {file.name}")
         for file in elo_results_dir.glob('*_leaderboard.csv'):
-            if 't2s_generation' in file.name:
-                leaderboard_table_file['t2s_generation'] = file
-            elif 'i2s_generation' in file.name:
-                leaderboard_table_file['i2s_generation'] = file
             else:
                 raise ValueError(f"Unknown file name: {file.name}")
@@ -84,7 +84,7 @@ if __name__ == "__main__":
     elo_results_dir = ELO_RESULTS_DIR
     models = ModelManager()
-    # elo_results_file, leaderboard_table_file = load_elo_results(elo_results_dir)
-    elo_results_file, leaderboard_table_file = None, None
     demo = build_combine_demo(models, elo_results_file, leaderboard_table_file)
     demo.queue(max_size=20).launch(server_port=server_port, root_path=ROOT_PATH, debug=True)

                         build_t2s_ui_single_model(models)
                     if elo_results_file:
                         with gr.Tab("Text-to-3D Leaderboard", id=3):
+                            build_leaderboard_tab(elo_results_file['text2shape'], leaderboard_table_file['text2shape'])
                     else:
                         with gr.Tab("Text-to-3D Leaderboard", id=3):
                             build_empty_leaderboard_tab()
                         build_i2s_ui_single_model(models)
                     if elo_results_file:
                         with gr.Tab("Image-to-3D Leaderboard", id=8):
+                            build_leaderboard_tab(elo_results_file['image2shape'], leaderboard_table_file['image2shape'])
                     else:
                         with gr.Tab("Image-to-3D Leaderboard", id=8):
                             build_empty_leaderboard_tab()
         elo_results_file = {}
         leaderboard_table_file = {}
         for file in elo_results_dir.glob('elo_results_*.pkl'):
+            if 'text2shape' in file.name:
+                elo_results_file['text2shape'] = file
+            elif 'image2shape' in file.name:
+                elo_results_file['image2shape'] = file
             else:
                 raise ValueError(f"Unknown file name: {file.name}")
         for file in elo_results_dir.glob('*_leaderboard.csv'):
+            if 'text2shape' in file.name:
+                leaderboard_table_file['text2shape'] = file
+            elif 'image2shape' in file.name:
+                leaderboard_table_file['image2shape'] = file
             else:
                 raise ValueError(f"Unknown file name: {file.name}")
     elo_results_dir = ELO_RESULTS_DIR
     models = ModelManager()
+    elo_results_file, leaderboard_table_file = load_elo_results(elo_results_dir)
+    # elo_results_file, leaderboard_table_file = None, None
     demo = build_combine_demo(models, elo_results_file, leaderboard_table_file)
     demo.queue(max_size=20).launch(server_port=server_port, root_path=ROOT_PATH, debug=True)

arena_elo/elo_rating/clean_battle_data.py CHANGED Viewed

@@ -21,42 +21,6 @@ from .basic_stats import get_log_files, NUM_SERVERS, LOG_ROOT_DIR
 from .utils import detect_language, get_time_stamp_from_date
 VOTES = ["tievote", "leftvote", "rightvote", "bothbad_vote"]
-IDENTITY_WORDS = [
-    "vicuna",
-    "lmsys",
-    "koala",
-    "uc berkeley",
-    "open assistant",
-    "laion",
-    "chatglm",
-    "chatgpt",
-    "gpt-4",
-    "openai",
-    "anthropic",
-    "claude",
-    "bard",
-    "palm",
-    "lamda",
-    "google",
-    "llama",
-    "qianwan",
-    "alibaba",
-    "mistral",
-    "zhipu",
-    "KEG lab",
-    "01.AI",
-    "AI2",
-    "Tülu",
-    "Tulu",
-    "NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.",
-    "$MODERATION$ YOUR INPUT VIOLATES OUR CONTENT MODERATION GUIDELINES.",
-    "API REQUEST ERROR. Please increase the number of max tokens.",
-    "**API REQUEST ERROR** Reason: The response was blocked.",
-    "**API REQUEST ERROR**",
-]
-for i in range(len(IDENTITY_WORDS)):
-    IDENTITY_WORDS[i] = IDENTITY_WORDS[i].lower()
 def remove_html(raw):
@@ -77,22 +41,28 @@ def to_openai_format(messages):
 def replace_model_name(old_name, tstamp):
     replace_dict = {
-        "bard": "palm-2",
-        "claude-v1": "claude-1",
-        "claude-instant-v1": "claude-instant-1",
-        "oasst-sft-1-pythia-12b": "oasst-pythia-12b",
-        "claude-2": "claude-2.0",
-        "PlayGroundV2": "Playground v2",
     }
-    if old_name in ["gpt-4", "gpt-3.5-turbo"]:
-        if tstamp > 1687849200:
-            return old_name + "-0613"
-        else:
-            return old_name + "-0314"
-    if old_name in replace_dict:
         return replace_dict[old_name]
     return old_name
 def read_file(filename):
     data = []
@@ -126,7 +96,7 @@ def load_image(image_path):
         return None
 def clean_battle_data(
-    log_files, exclude_model_names, ban_ip_list=None, sanitize_ip=False, mode="simple", task_name="t2s"
 ):
     data = read_file_parallel(log_files, num_threads=16)
@@ -139,6 +109,7 @@ def clean_battle_data(
     all_models = set()
     all_ips = dict()
     ct_anony = 0
     ct_invalid = 0
     ct_leaked_identity = 0
@@ -165,17 +136,18 @@ def clean_battle_data(
         ):
             ct_invalid += 1
             continue
-        if models_public[0] == "" or models_public[0] == "Model A":
-            anony = True
-            models = models_hidden
-            ct_anony += 1
         else:
-            anony = False
-            models = models_public
-            if not models_public == models_hidden:
-                ct_invalid += 1
-                continue
         # # Detect langauge
         # state = row["states"][0]
@@ -204,26 +176,37 @@ def clean_battle_data(
         #     continue
         # Replace bard with palm
-        if task_name == "image_editing":
-            if not all(x.startswith("imagenhub_") and x.endswith("_edition") for x in models):
-                # print(f"Invalid model names: {models}")
                 ct_invalid += 1
                 continue
-            models = [x[len("imagenhub_"):-len("_edition")] for x in models]
-        elif task_name == "t2i_generation":
-            if not all("playground" in x.lower() or (x.startswith("imagenhub_") and x.endswith("_generation")) for x in models):
-                # print(f"Invalid model names: {models}")
                 ct_invalid += 1
                 continue
-            # models = [x[len("imagenhub_"):-len("_generation")] for x in models]
-            for i, model_name in enumerate(models):
-                if model_name.startswith("imagenhub_"):
-                    models[i] = model_name[len("imagenhub_"):-len("_generation")]
         else:
             raise ValueError(f"Invalid task_name: {task_name}")
-        models = [replace_model_name(m, row["tstamp"]) for m in models]
         # Exclude certain models
         if exclude_model_names and any(x in exclude_model_names for x in models):
             ct_invalid += 1
@@ -237,30 +220,36 @@ def clean_battle_data(
         #     print(f"Invalid vote before the valid starting date for {models[0]} and {models[1]}")
         #     ct_invalid += 1
         #     continue
         if mode == "conv_release":
-            # assert the two images are the same
-            date = datetime.datetime.fromtimestamp(row["tstamp"], tz=timezone("US/Pacific")).strftime("%Y-%m-%d") # 2024-02-29
-            image_path_format = f"{LOG_ROOT_DIR}/{date}-convinput_images/input_image_"
-            image_path_0 = image_path_format + str(row["states"][0]["conv_id"]) + ".png"
-            image_path_1 = image_path_format + str(row["states"][1]["conv_id"]) + ".png"
-            if not os.path.exists(image_path_0) or not os.path.exists(image_path_1):
-                print(f"Image not found for {image_path_0} or {image_path_1}")
-                ct_invalid += 1
-                continue
-            image_0 = load_image(image_path_0)
-            image_1 = load_image(image_path_1)
-            if image_0 is None or image_1 is None:
-                print(f"Image not found for {image_path_0} or {image_path_1}")
-                ct_invalid += 1
-                continue
-            if image_0.tobytes() != image_1.tobytes():
-                print(f"Image not the same for {image_path_0} and {image_path_1}")
                 ct_invalid += 1
                 continue
         question_id = row["states"][0]["conv_id"]
@@ -284,24 +273,30 @@ def clean_battle_data(
             ct_banned += 1
             continue
         # Save the results
         battles.append(
             dict(
                 question_id=question_id,
                 model_a=models[0],
                 model_b=models[1],
                 winner=convert_type[row["type"]],
                 judge=f"arena_user_{user_id}",
                 # conversation_a=conversation_a,
                 # conversation_b=conversation_b,
-                # turn=len(conversation_a) // 2,
                 anony=anony,
                 # language=lang_code,
                 tstamp=row["tstamp"],
             )
         )
-        all_models.update(models_hidden)
     battles.sort(key=lambda x: x["tstamp"])
     last_updated_tstamp = battles[-1]["tstamp"]
@@ -316,6 +311,8 @@ def clean_battle_data(
     )
     print(f"#battles: {len(battles)}, #anony: {ct_anony}")
     print(f"#models: {len(all_models)}, {all_models}")
     print(f"last-updated: {last_updated_datetime}")
     if ban_ip_list is not None:
@@ -331,9 +328,9 @@ if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--max-num-files", type=int)
     parser.add_argument(
-        "--mode", type=str, choices=["simple", "conv_release"], default="simple"
     )
-    parser.add_argument("--task_name", type=str, choices=["t2s", "i2s"])
     parser.add_argument("--exclude-model-names", type=str, nargs="+")
     parser.add_argument("--ban-ip-file", type=str)
     parser.add_argument("--sanitize-ip", action="store_true", default=False)

 from .utils import detect_language, get_time_stamp_from_date
 VOTES = ["tievote", "leftvote", "rightvote", "bothbad_vote"]
 def remove_html(raw):
 def replace_model_name(old_name, tstamp):
     replace_dict = {
+        "point-e-t": "point-e",
+        "shap-e-t": "shap-e",
+        "point-e-i": "point-e",
+        "shap-e-i": "shap-e",
+        "point-e_t": "point-e",
+        "shap-e_t": "shap-e",
+        "point-e_i": "point-e",
+        "shap-e_i": "shap-e",
     }
+    if old_name in replace_dict.keys():
         return replace_dict[old_name]
     return old_name
+def replace_dim(dim_name):
+    replace_dict = {
+        "Geometry Quality": "Geometry Details",
+    }
+    if dim_name.endswith(": "):
+        dim_name = dim_name[:-2]
+    if dim_name in replace_dict.keys():
+        return replace_dict[dim_name]
+    return dim_name
 def read_file(filename):
     data = []
         return None
 def clean_battle_data(
+    log_files, exclude_model_names, ban_ip_list=None, sanitize_ip=False, mode="simple", task_name="text2shape"
 ):
     data = read_file_parallel(log_files, num_threads=16)
     all_models = set()
     all_ips = dict()
+    dim_counts = dict()
     ct_anony = 0
     ct_invalid = 0
     ct_leaked_identity = 0
         ):
             ct_invalid += 1
             continue
+        if not models_public == models_hidden:
+            ct_invalid += 1
+            continue
+        else:
+             models = models_hidden
+        if 'anony' not in row.keys():
+            ct_invalid += 1
+            continue
         else:
+            anony = row['anony']
         # # Detect langauge
         # state = row["states"][0]
         #     continue
         # Replace bard with palm
+        # if task_name == "image_editing":
+        #     if not all(x.startswith("imagenhub_") and x.endswith("_edition") for x in models):
+        #         # print(f"Invalid model names: {models}")
+        #         ct_invalid += 1
+        #         continue
+        #     models = [x[len("imagenhub_"):-len("_edition")] for x in models]
+        # elif task_name == "t2i_generation":
+        #     if not all("playground" in x.lower() or (x.startswith("imagenhub_") and x.endswith("_generation")) for x in models):
+        #         # print(f"Invalid model names: {models}")
+        #         ct_invalid += 1
+        #         continue
+        #     # models = [x[len("imagenhub_"):-len("_generation")] for x in models]
+        #     for i, model_name in enumerate(models):
+        #         if model_name.startswith("imagenhub_"):
+        #             models[i] = model_name[len("imagenhub_"):-len("_generation")]
+        if task_name == 'text2shape':
+            if row['states'][0]['i2s_mode'] or row['states'][1]['i2s_mode']:
                 ct_invalid += 1
                 continue
+        elif task_name == 'image2shape':
+            if not row['states'][0]['i2s_mode'] or not row['states'][1]['i2s_mode']:
                 ct_invalid += 1
                 continue
         else:
             raise ValueError(f"Invalid task_name: {task_name}")
+        models = [replace_model_name(m, row["tstamp"]) for m in models]
+        if anony:
+            ct_anony += 1
         # Exclude certain models
         if exclude_model_names and any(x in exclude_model_names for x in models):
             ct_invalid += 1
         #     print(f"Invalid vote before the valid starting date for {models[0]} and {models[1]}")
         #     ct_invalid += 1
         #     continue
         if mode == "conv_release":
+            if row['states'][0]['offline'] != row['states'][1]['offline']:
                 ct_invalid += 1
                 continue
+            elif row['states'][0]['offline']:
+                if row['states'][0]['offline_idx'] != row['states'][1]['offline_idx']:
+                    ct_invalid += 1
+                    continue
+            else:
+                # assert the two images are the same
+                date = datetime.datetime.fromtimestamp(row["tstamp"], tz=timezone("US/Pacific")).strftime("%Y-%m-%d") # 2024-02-29
+                image_path_format = f"{LOG_ROOT_DIR}/{date}-convinput_images/input_image_"
+                image_path_0 = image_path_format + str(row["states"][0]["conv_id"]) + ".png"
+                image_path_1 = image_path_format + str(row["states"][1]["conv_id"]) + ".png"
+                if not os.path.exists(image_path_0) or not os.path.exists(image_path_1):
+                    print(f"Image not found for {image_path_0} or {image_path_1}")
+                    ct_invalid += 1
+                    continue
+                image_0 = load_image(image_path_0)
+                image_1 = load_image(image_path_1)
+                if image_0 is None or image_1 is None:
+                    print(f"Image not found for {image_path_0} or {image_path_1}")
+                    ct_invalid += 1
+                    continue
+                if image_0.tobytes() != image_1.tobytes():
+                    print(f"Image not the same for {image_path_0} and {image_path_1}")
+                    ct_invalid += 1
+                    continue
         question_id = row["states"][0]["conv_id"]
             ct_banned += 1
             continue
+        dim = replace_dim(row['dim'])
+        if dim not in dim_counts.keys():
+            dim_counts[dim] = 0
+        dim_counts[dim] += 1
         # Save the results
         battles.append(
             dict(
                 question_id=question_id,
+                dim=dim,
                 model_a=models[0],
                 model_b=models[1],
                 winner=convert_type[row["type"]],
                 judge=f"arena_user_{user_id}",
                 # conversation_a=conversation_a,
                 # conversation_b=conversation_b,
+                idx=row['states'][0]['offline_idx'],
                 anony=anony,
                 # language=lang_code,
                 tstamp=row["tstamp"],
             )
         )
+        all_models.update(models)
     battles.sort(key=lambda x: x["tstamp"])
     last_updated_tstamp = battles[-1]["tstamp"]
     )
     print(f"#battles: {len(battles)}, #anony: {ct_anony}")
     print(f"#models: {len(all_models)}, {all_models}")
+    for dim, count in dim_counts.items():
+        print(dim, ": ", count)
     print(f"last-updated: {last_updated_datetime}")
     if ban_ip_list is not None:
     parser = argparse.ArgumentParser()
     parser.add_argument("--max-num-files", type=int)
     parser.add_argument(
+        "--mode", type=str, choices=["simple", "conv_release"], default="conv_release"
     )
+    parser.add_argument("--task_name", type=str, choices=["text2shape", "image2shape"])
     parser.add_argument("--exclude-model-names", type=str, nargs="+")
     parser.add_argument("--ban-ip-file", type=str)
     parser.add_argument("--sanitize-ip", action="store_true", default=False)

arena_elo/elo_rating/elo_analysis.py CHANGED Viewed

@@ -350,29 +350,47 @@ if __name__ == "__main__":
         log_files = get_log_files(args.max_num_files)
         battles = clean_battle_data(log_files)
-    anony_results = report_elo_analysis_results(
-        battles, rating_system=args.rating_system, num_bootstrap=args.num_bootstrap, anony_only=True
-    )
-    full_results = report_elo_analysis_results(
-        battles, rating_system=args.rating_system, num_bootstrap=args.num_bootstrap, anony_only=False
-    )
-    print("# Online Elo")
-    pretty_print_elo_rating(anony_results["elo_rating_online"])
-    print("# Median")
-    pretty_print_elo_rating(anony_results["elo_rating_final"])
-    print(f"last update : {anony_results['last_updated_datetime']}")
-    last_updated_tstamp = full_results["last_updated_tstamp"]
     cutoff_date = datetime.datetime.fromtimestamp(
         last_updated_tstamp, tz=timezone("US/Pacific")
     ).strftime("%Y%m%d")
-    results = {
-        "anony": anony_results,
-        "full": full_results,
-    }
     with open(f"elo_results_{cutoff_date}.pkl", "wb") as fout:
         pickle.dump(results, fout)

         log_files = get_log_files(args.max_num_files)
         battles = clean_battle_data(log_files)
+    ## split battles by evaluated dimensions
+    battles = pd.DataFrame(battles)
+    dims = list(battles['dim'].unique())
+    # dim_battles = {}
+    # for battle in battles:
+    #     print(battle)
+    #     if battle["dim"] not in dim_battles.keys():
+    #         dim_battles[battle.dim] = []
+    #     dim_battles[battle.dim].append(battle)
+    results = {}
+    last_updated_tstamp = None
+    for dim in dims:
+        print(dim)
+        dim_battles = battles[battles['dim']==dim].reset_index(drop=True)
+        print(dim_battles.shape)
+        anony_results = report_elo_analysis_results(
+            dim_battles, rating_system=args.rating_system, num_bootstrap=args.num_bootstrap, anony_only=True
+        )
+        full_results = report_elo_analysis_results(
+            dim_battles, rating_system=args.rating_system, num_bootstrap=args.num_bootstrap, anony_only=False
+        )
+        print(f"## {dim}")
+        print("# Online Elo")
+        pretty_print_elo_rating(anony_results["elo_rating_online"])
+        print("# Median")
+        pretty_print_elo_rating(anony_results["elo_rating_final"])
+        print(f"last update : {anony_results['last_updated_datetime']}")
+        results[dim] = {
+            "anony": anony_results,
+            "full": full_results,
+        }
+        if last_updated_tstamp is None or last_updated_tstamp < full_results["last_updated_tstamp"]:
+            last_updated_tstamp = full_results["last_updated_tstamp"]
     cutoff_date = datetime.datetime.fromtimestamp(
         last_updated_tstamp, tz=timezone("US/Pacific")
     ).strftime("%Y%m%d")
+    print(cutoff_date)
     with open(f"elo_results_{cutoff_date}.pkl", "wb") as fout:
         pickle.dump(results, fout)

arena_elo/elo_rating/generate_leaderboard.py CHANGED Viewed

@@ -14,43 +14,63 @@ def main(
     with open(elo_rating_pkl, "rb") as fin:
         elo_rating_results = pickle.load(fin)
-    anony_elo_rating_results = elo_rating_results["anony"]
-    full_elo_rating_results = elo_rating_results["full"]
-    anony_leaderboard_data = anony_elo_rating_results["leaderboard_table_df"]
-    full_leaderboard_data = full_elo_rating_results["leaderboard_table_df"]
-    # Model,MT-bench (score),Arena Elo rating,MMLU,License,Link
-    fields = ["key", "Model", "Arena Elo rating (anony)", "Arena Elo rating (full)", "License", "Organization", "Link"]
-    # set Organization and license to empty for now
-    all_models = anony_leaderboard_data.index.tolist()
-    for model in all_models:
-        if not model in model_info:
-            model_info[model] = {}
-            model_info[model]["License"] = "N/A"
-            model_info[model]["Organization"] = "N/A"
-            model_info[model]["Link"] = "N/A"
-        model_info[model]["Model"] = model
-        model_info[model]["key"] = model
-        if model in anony_leaderboard_data.index:
-            model_info[model]["Arena Elo rating (anony)"] = anony_leaderboard_data.loc[model, "rating"]
-        else:
-            model_info[model]["Arena Elo rating (anony)"] = 0
-        if model in full_elo_rating_results["leaderboard_table_df"].index:
-            model_info[model]["Arena Elo rating (full)"] = full_leaderboard_data.loc[model, "rating"]
-        else:
-            model_info[model]["Arena Elo rating (full)"] = 0
-        # if model in anony_leaderboard_data.index:
-        #     model_info[model]["Arena Elo rating"] = anony_leaderboard_data.loc[model, "rating"]
-        # else:
-        #     model_info[model]["Arena Elo rating"] = 0
     final_model_info = {}
-    for model in model_info:
-        if "Model" in model_info[model]:
-            final_model_info[model] = model_info[model]
     model_info = final_model_info
     exclude_keys = ['starting_from']
@@ -61,7 +81,7 @@ def main(
     df = pd.DataFrame(model_info).T
     df = df[fields]
     # sort by anony rating
-    df = df.sort_values(by=["Arena Elo rating (anony)"], ascending=False)
     df.to_csv(output_csv, index=False)
     print("Leaderboard data saved to", output_csv)
     print(df)

     with open(elo_rating_pkl, "rb") as fin:
         elo_rating_results = pickle.load(fin)
+    # Model, Dim Elo rating (anony), Arena Elo rating (anony), Link, Orgnization
+    model_ratings = model_info
+    fields = ["key", "Model"]
+    for dim, dim_results in elo_rating_results.items():
+        anony_elo_rating_results = dim_results["anony"]
+        full_elo_rating_results = dim_results["full"]
+        anony_leaderboard_data = anony_elo_rating_results["leaderboard_table_df"]
+        full_leaderboard_data = full_elo_rating_results["leaderboard_table_df"]
+        fields += [f"{dim} Elo rating"]
+        all_models = anony_leaderboard_data.index.tolist()
+        for model in all_models:
+            if not model in model_ratings:
+                # set Organization and license to empty
+                model_ratings[model] = {}
+                model_ratings[model]["Organization"] = "N/A"
+                model_ratings[model]["Link"] = "N/A"
+            model_ratings[model]["Model"] = model
+            model_ratings[model]["key"] = model
+            if model in anony_leaderboard_data.index:
+                model_ratings[model][f"{dim} Elo rating"] = anony_leaderboard_data.loc[model, "rating"]
+            else:
+                model_ratings[model][f"{dim} Elo rating"] = 0
+            if "Arena Elo rating" not in model_ratings[model].keys():
+                model_ratings[model]["Arena Elo rating"] = 0
+            model_ratings[model]["Arena Elo rating"] += model_ratings[model][f"{dim} Elo rating"]
+            ## Anony
+            # if model in anony_leaderboard_data.index:
+            #     model_ratings[model][f"{dim} Elo rating (anony)"] = anony_leaderboard_data.loc[model, "rating"]
+            # else:
+            #     model_ratings[model][f"{dim} Elo rating (anony)"] = 0
+            # if "Arena Elo rating (anony)" not in model_ratings[model].keys():
+            #     model_ratings[model]["Arena Elo rating (anony)"] = 0
+            # model_ratings[model]["Arena Elo rating (anony)"] += model_ratings[model][f"{dim} Elo rating (anony)"]
+            ## Anony + Named
+            # if model in full_elo_rating_results["leaderboard_table_df"].index:
+            #     model_ratings[model][f"{dim} Elo rating (full)"] = full_leaderboard_data.loc[model, "rating"]
+            # else:
+            #     model_ratings[model][f"{dim} Elo rating (full)"] = 0
+            # if "Arena Elo rating (full)" not in model_ratings[model].keys():
+            #     model_ratings[model]["Arena Elo rating (full)"] = 0
+            # model_ratings[model]["Arena Elo rating (full)"] += model_ratings[model][f"{dim} Elo rating (full)"]
+    fields += ["Arena Elo rating", "Link", "Organization"]
+    # fields += ["Arena Elo rating (anony)", "Arena Elo rating (full)", "Link", "Organization"]
     final_model_info = {}
+    print(model_ratings)
+    for model in model_ratings:
+        if "Model" in model_ratings[model]:
+            # model_ratings[model]["Arena Elo rating (anony)"] /= 5
+            # model_ratings[model]["Arena Elo rating (full)"] /= 5
+            model_ratings[model]["Arena Elo rating"] /= 5
+            final_model_info[model] = model_ratings[model]
     model_info = final_model_info
     exclude_keys = ['starting_from']
     df = pd.DataFrame(model_info).T
     df = df[fields]
     # sort by anony rating
+    df = df.sort_values(by=["Arena Elo rating"], ascending=False)
     df.to_csv(output_csv, index=False)
     print("Leaderboard data saved to", output_csv)
     print(df)

arena_elo/results/latest/elo_results_image2shape.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:763a67ed5648fc18f5143494c5df040e15d36239afcad12b560bd3bd7f3b15f2
+size 356525

arena_elo/results/latest/elo_results_text2shape.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b0d5c169127ff56f994f911cdc9a291418082f998f8cc227bb8bc93fcac60e6
+size 303063

arena_elo/results/latest/image2shape_leaderboard.csv ADDED Viewed

	@@ -0,0 +1,14 @@

+key,Model,Geometry Plausibility Elo rating,Geometry Details Elo rating,Texture Quality Elo rating,Geometry-Texture Coherency Elo rating,Visual Alignment Elo rating,Arena Elo rating,Link,Organization
+wonder3d,wonder3d,1243.284839499005,1248.2975105106993,1167.837985855818,1320.3888541585839,1350.506240958834,1266.063086196588,N/A,N/A
+zero123-xl,zero123-xl,1194.649412893989,1101.0347850835524,1312.087224585339,1207.9352273497925,1144.1779276854743,1191.9769155196295,N/A,N/A
+openlrm,openlrm,1091.8760192981938,1222.0774978360885,1357.186686625133,1172.2322808524807,1113.8647248753261,1191.4474418974444,N/A,N/A
+magic123,magic123,1178.7199391336158,1029.8103015949425,1134.7674602557545,1301.8417174024141,1248.4622906482673,1178.720341806999,N/A,N/A
+grm-i,grm-i,1083.459465213645,1043.62495738426,1182.665735601177,1148.2931891751466,1434.9259362777323,1178.5938567303922,N/A,N/A
+stable-zero123,stable-zero123,1242.5508388592934,1196.2292237209613,1148.3376690300986,1180.2722658970024,1114.9239043945179,1176.4627803803746,N/A,N/A
+lgm,lgm,1057.916276030041,1106.0181413778544,1159.3104060792818,1106.1000119897903,1082.1591938968284,1102.3008058747594,N/A,N/A
+syncdreamer,syncdreamer,994.3065008728838,1090.5371113220137,876.5482674184123,889.0423446249837,849.5440886590599,939.9956625794706,N/A,N/A
+shap-e,shap-e,863.755371488366,865.6017926257314,891.563972695212,972.4063159954788,739.4720652007818,866.5599036011139,N/A,N/A
+triplane-gaussian,triplane-gaussian,850.8528602346569,889.7268326768269,800.0847617841707,725.8402704343466,1007.4240505628655,854.7857551385734,N/A,N/A
+point-e,point-e,816.3259708197892,777.9698792947121,834.9771690582178,859.8364726200334,740.3201250121207,805.8859233609746,N/A,N/A
+free3d,free3d,694.5518065271474,683.8285617090779,617.6756798090618,531.0802012842535,784.2006999191588,662.26738984974,N/A,N/A
+escher-net,escher-net,687.7506991293735,745.2434048632799,516.9569812023235,584.7308482156934,390.0187519090333,584.9401370639407,N/A,N/A

arena_elo/results/latest/text2shape_leaderboard.csv ADDED Viewed

	@@ -0,0 +1,11 @@

+key,Model,Geometry Plausibility Elo rating,Texture Quality Elo rating,Geometry Details Elo rating,Geometry-Texture Coherency Elo rating,Semantic Alignment Elo rating,Arena Elo rating,Link,Organization
+mvdream,mvdream,1246.0482236749672,1388.7547518674971,1284.500188530191,1311.3665264514373,1328.133497111749,1311.7606375271685,N/A,N/A
+lucid-dreamer,lucid-dreamer,1089.4897652983511,1262.0324465310641,1173.4213901828666,1182.4132799557342,1140.2117496688475,1169.5137263273725,N/A,N/A
+grm-t,grm-t,1065.2957236973393,938.5454826862575,1115.6433344459817,1019.5242102399678,1020.2764909535268,1031.8570484046147,N/A,N/A
+magic3d,magic3d,1012.6077627602834,1036.984799628633,1028.7772442112278,1063.4857834325169,999.9807438670646,1028.367266779945,N/A,N/A
+latent-nerf,latent-nerf,937.1268113750971,910.8947491420889,938.4922547668017,874.1294115476043,1021.3685731479346,936.4023599959053,N/A,N/A
+dreamfusion,dreamfusion,970.7944600712297,922.0644331004878,951.5799643764489,911.605820758788,843.9671829685316,920.0023722550972,N/A,N/A
+sjc,sjc,870.9792588602744,901.2344860951221,812.8106728066198,982.9416879375193,1004.6125410259175,914.5157293450906,N/A,N/A
+shap-e,shap-e,988.0167259180473,917.1927616589292,911.4422051186916,881.2592471160182,871.9730114545998,913.9767902532573,N/A,N/A
+point-e,point-e,819.6412683444105,722.29608928992,783.3327455611708,773.274032560414,769.4762098018289,773.6040691115488,N/A,N/A
+,,1000.0,,,,,200.0,N/A,N/A

model/model_registry.py CHANGED Viewed

@@ -184,7 +184,7 @@ register_model_info(
 )
 register_model_info(
-    ["stable-zero123", "zero123-xl"],
     "Stable Zero123",
     "https://stability.ai/news/stable-zero123-3d-generation",
     "Quality 3D Object Generation from Single Images",

 )
 register_model_info(
+    ["stable-zero123"],
     "Stable Zero123",
     "https://stability.ai/news/stable-zero123-3d-generation",
     "Quality 3D Object Generation from Single Images",

serve/leaderboard.py CHANGED Viewed

@@ -21,6 +21,39 @@ import pandas as pd
 basic_component_values = [None] * 6
 leader_component_values = [None] * 5
 # def make_leaderboard_md(elo_results):
 #     leaderboard_md = f"""
@@ -38,7 +71,7 @@ leader_component_values = [None] * 5
 def make_leaderboard_md(elo_results):
     leaderboard_md = f"""
-# 🏆 GenAI-Arena Leaderboard
 """
     return leaderboard_md
@@ -58,15 +91,11 @@ def model_hyperlink(model_name, link):
 def load_leaderboard_table_csv(filename, add_hyperlink=True):
     df = pd.read_csv(filename)
     for col in df.columns:
-        if "Arena Elo rating" in col:
-            df[col] = df[col].apply(lambda x: int(x) if x != "-" else np.nan)
-        elif col == "MMLU":
-            df[col] = df[col].apply(lambda x: round(x * 100, 1) if x != "-" else np.nan)
-        elif col == "MT-bench (win rate %)":
-            df[col] = df[col].apply(lambda x: round(x, 1) if x != "-" else np.nan)
-        elif col == "MT-bench (score)":
-            df[col] = df[col].apply(lambda x: round(x, 2) if x != "-" else np.nan)
         if add_hyperlink and col == "Model":
             df[col] = df.apply(lambda row: model_hyperlink(row[col], row["Link"]), axis=1)
@@ -125,45 +154,62 @@ def get_full_table(anony_arena_df, full_arena_df, model_table_df):
     return values
-def get_arena_table(arena_df, model_table_df):
     # sort by rating
-    arena_df = arena_df.sort_values(by=["rating"], ascending=False)
     values = []
-    for i in range(len(arena_df)):
         row = []
-        model_key = arena_df.index[i]
-        model_name = model_table_df[model_table_df["key"] == model_key]["Model"].values[
-            0
-        ]
         # rank
         row.append(i + 1)
         # model display name
-        row.append(model_name)
         # elo rating
-        row.append(round(arena_df.iloc[i]["rating"]))
-        upper_diff = round(arena_df.iloc[i]["rating_q975"] - arena_df.iloc[i]["rating"])
-        lower_diff = round(arena_df.iloc[i]["rating"] - arena_df.iloc[i]["rating_q025"])
-        row.append(f"+{upper_diff}/-{lower_diff}")
         # num battles
-        row.append(round(arena_df.iloc[i]["num_battles"]))
         # Organization
-        row.append(
-            model_table_df[model_table_df["key"] == model_key]["Organization"].values[0]
-        )
-        # license
-        row.append(
-            model_table_df[model_table_df["key"] == model_key]["License"].values[0]
-        )
         values.append(row)
     return values
 def make_arena_leaderboard_md(elo_results):
-    arena_df = elo_results["leaderboard_table_df"]
-    last_updated = elo_results["last_updated_datetime"]
-    total_votes = sum(arena_df["num_battles"]) // 2
-    total_models = len(arena_df)
     leaderboard_md = f"""
@@ -171,9 +217,8 @@ def make_arena_leaderboard_md(elo_results):
 Total #models: **{total_models}**(anonymous). Total #votes: **{total_votes}**. Last updated: {last_updated}.
 (Note: Only anonymous votes are considered here. Check the full leaderboard for all votes.)
-Contribute the votes 🗳️ at [GenAI-Arena](https://huggingface.co/spaces/TIGER-Lab/GenAI-Arena)!
-If you want to see more models, please help us [add them](https://github.com/TIGER-AI-Lab/ImagenHub?tab=readme-ov-file#-contributing-).
 """
     return leaderboard_md
@@ -205,14 +250,20 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
         with open(elo_results_file, "rb") as fin:
             elo_results = pickle.load(fin)
-        anony_elo_results = elo_results["anony"]
-        full_elo_results = elo_results["full"]
-        anony_arena_df = anony_elo_results["leaderboard_table_df"]
-        full_arena_df = full_elo_results["leaderboard_table_df"]
-        p1 = anony_elo_results["win_fraction_heatmap"]
-        p2 = anony_elo_results["battle_count_heatmap"]
-        p3 = anony_elo_results["bootstrap_elo_rating"]
-        p4 = anony_elo_results["average_win_rate_bar"]
         md = make_leaderboard_md(anony_elo_results)
@@ -222,54 +273,58 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
         model_table_df = load_leaderboard_table_csv(leaderboard_table_file)
         with gr.Tabs() as tabs:
             # arena table
-            arena_table_vals = get_arena_table(anony_arena_df, model_table_df)
             with gr.Tab("Arena Elo", id=0):
                 md = make_arena_leaderboard_md(anony_elo_results)
                 gr.Markdown(md, elem_id="leaderboard_markdown")
                 gr.Dataframe(
-                    headers=[
-                        "Rank",
-                        "🤖 Model",
-                        "⭐ Arena Elo",
-                        "📊 95% CI",
-                        "🗳️ Votes",
-                        "Organization",
-                        "License",
-                    ],
                     datatype=[
                         "str",
                         "markdown",
                         "number",
-                        "str",
                         "number",
-                        "str",
-                        "str",
                     ],
                     value=arena_table_vals,
                     elem_id="arena_leaderboard_dataframe",
                     height=700,
-                    column_widths=[50, 200, 100, 100, 100, 150, 150],
-                    wrap=True,
-                )
-            with gr.Tab("Full Leaderboard", id=1):
-                md = make_full_leaderboard_md(full_elo_results)
-                gr.Markdown(md, elem_id="leaderboard_markdown")
-                full_table_vals = get_full_table(anony_arena_df, full_arena_df, model_table_df)
-                gr.Dataframe(
-                    headers=[
-                        "🤖 Model",
-                        "⭐ Arena Elo (anony)",
-                        "⭐ Arena Elo (full)",
-                        "Organization",
-                        "License",
-                    ],
-                    datatype=["markdown", "number", "number", "str", "str"],
-                    value=full_table_vals,
-                    elem_id="full_leaderboard_dataframe",
-                    column_widths=[200, 100, 100, 100, 150, 150],
-                    height=700,
                     wrap=True,
                 )
         if not show_plot:
             gr.Markdown(
                 """ ## We are still collecting more votes on more models. The ranking will be updated very fruquently. Please stay tuned!
@@ -279,7 +334,7 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
     else:
         pass
-    leader_component_values[:] = [md, p1, p2, p3, p4]
     """
     with gr.Row():

 basic_component_values = [None] * 6
 leader_component_values = [None] * 5
+nam_dict = {
+    "dreamfusion": "DreamFusion",
+    "mvdream": "MVDream",
+    "lucid-dreamer": "LucidDreamer",
+    "magic3d": "Magic3D",
+    "grm-t": "GRM", "grm-i": "GRM", "grm": "GRM",
+    "latent-nerf": "Latent-NeRF",
+    "shap-e-t": "Shap-E", "shap-e-i": "Shap-E", "shap-e": "Shap-E",
+    "point-e-t": "Point-E", "point-e-i": "Point-E", "point-e": "Point-E",
+    "sjc": "SJC",
+    "wonder3d": "Wonder3D",
+    "openlrm": "OpenLRM",
+    "sz123": "Stable Zero123", "stable-zero123": "Stable Zero123",
+    "z123": "Zero123-XL", "zero123-xl": "Zero123-XL",
+    "magic123": "Magic123",
+    "lgm": "LGM",
+    "syncdreamer": "SyncDreamer",
+    "triplane-gaussian": "TriplaneGaussian",
+    "escher-net": "EscherNet",
+    "free3d": "Free3D"
+}
+def replace_model_name(name, rank):
+    name = nam_dict[name]
+    if rank==0:
+        return "🥇 "+name
+    elif rank==1:
+        return "🥈 "+name
+    elif rank==2:
+        return '🥉 '+name
+    else:
+        return name
 # def make_leaderboard_md(elo_results):
 #     leaderboard_md = f"""
 def make_leaderboard_md(elo_results):
     leaderboard_md = f"""
+# 🏆 3DGen-Arena Leaderboard
 """
     return leaderboard_md
 def load_leaderboard_table_csv(filename, add_hyperlink=True):
     df = pd.read_csv(filename)
+    df = df.drop(df[df["key"].isnull()].index)
     for col in df.columns:
+        if "Elo rating" in col:
+            print(df[col])
+            df[col] = df[col].apply(lambda x: int(x) if (x != "-" and x != np.nan) else np.nan)
         if add_hyperlink and col == "Model":
             df[col] = df.apply(lambda row: model_hyperlink(row[col], row["Link"]), axis=1)
     return values
+def get_arena_table(arena_dfs, model_table_df):
     # sort by rating
+    # arena_df = arena_df.sort_values(by=["rating"], ascending=False)
     values = []
+    for i in range(len(model_table_df)):
         row = []
+        # model_key = arena_df.index[i]
+        # model_name = model_table_df[model_table_df["key"] == model_key]["Model"].values[
+        #     0
+        # ]
+        model_name = model_table_df.iloc[i]["key"]
         # rank
         row.append(i + 1)
         # model display name
+        row.append(replace_model_name(model_name, i))
         # elo rating
+        num_battles = 0
+        for dim in arena_dfs.keys():
+            print(arena_dfs[dim].loc[model_name])
+            row.append(round(arena_dfs[dim].loc[model_name]["rating"], 2))
+            upper_diff = round(arena_dfs[dim].loc[model_name]["rating_q975"] - arena_dfs[dim].loc[model_name]["rating"])
+            lower_diff = round(arena_dfs[dim].loc[model_name]["rating"] - arena_dfs[dim].loc[model_name]["rating_q025"])
+            # row.append(f"+{upper_diff}/-{lower_diff}")
+            try:
+                num_battles += round(arena_dfs[dim].loc[model_name]["num_battles"])
+            except:
+                num_battles += 0
+        # row.append(round(arena_df.iloc[i]["rating"]))
+        # upper_diff = round(arena_df.iloc[i]["rating_q975"] - arena_df.iloc[i]["rating"])
+        # lower_diff = round(arena_df.iloc[i]["rating"] - arena_df.iloc[i]["rating_q025"])
+        # row.append(f"+{upper_diff}/-{lower_diff}")
+        row.append(round(model_table_df.iloc[i]["Arena Elo rating"], 2))
         # num battles
+        # row.append(round(arena_df.iloc[i]["num_battles"]))
+        row.append(num_battles)
         # Organization
+        # row.append(
+        #     model_table_df[model_table_df["key"] == model_key]["Organization"].values[0]
+        # )
+        # # license
+        # row.append(
+        #     model_table_df[model_table_df["key"] == model_key]["License"].values[0]
+        # )
         values.append(row)
     return values
 def make_arena_leaderboard_md(elo_results):
+    total_votes = 0
+    for dim in elo_results.keys():
+        arena_df = elo_results[dim]["leaderboard_table_df"]
+        last_updated = elo_results[dim]["last_updated_datetime"]
+        total_votes += sum(arena_df["num_battles"]) // 2
+        total_models = len(arena_df)
     leaderboard_md = f"""
 Total #models: **{total_models}**(anonymous). Total #votes: **{total_votes}**. Last updated: {last_updated}.
 (Note: Only anonymous votes are considered here. Check the full leaderboard for all votes.)
+Contribute the votes 🗳️ at [3DGen-Arena](https://huggingface.co/spaces/ZhangYuhan/3DGen-Arena)!
 """
     return leaderboard_md
         with open(elo_results_file, "rb") as fin:
             elo_results = pickle.load(fin)
+        # print(elo_results)
+        # print(elo_results.keys())
+        anony_elo_results, full_elo_results = {}, {}
+        anony_arena_dfs, full_arena_dfs = {}, {}
+        p1, p2, p3, p4 = {}, {}, {}, {}
+        for dim in elo_results.keys():
+            anony_elo_results[dim] = elo_results[dim]["anony"]
+            full_elo_results[dim] = elo_results[dim]["full"]
+            anony_arena_dfs[dim] = anony_elo_results[dim]["leaderboard_table_df"]
+            full_arena_dfs[dim] = full_elo_results[dim]["leaderboard_table_df"]
+            p1[dim] = anony_elo_results[dim]["win_fraction_heatmap"]
+            p2[dim] = anony_elo_results[dim]["battle_count_heatmap"]
+            p3[dim] = anony_elo_results[dim]["bootstrap_elo_rating"]
+            p4[dim] = anony_elo_results[dim]["average_win_rate_bar"]
         md = make_leaderboard_md(anony_elo_results)
         model_table_df = load_leaderboard_table_csv(leaderboard_table_file)
         with gr.Tabs() as tabs:
             # arena table
+            arena_table_vals = get_arena_table(anony_arena_dfs, model_table_df)
             with gr.Tab("Arena Elo", id=0):
                 md = make_arena_leaderboard_md(anony_elo_results)
                 gr.Markdown(md, elem_id="leaderboard_markdown")
                 gr.Dataframe(
+                    # headers=[
+                    #     "Rank",
+                    #     "🤖 Model",
+                    #     "⭐ Arena Elo",
+                    #     "📊 95% CI",
+                    #     "🗳️ Votes",
+                    #     "Organization",
+                    #     "License",
+                    # ],
+                    headers=["Rank", "🤖 Model"] + [f"📈 {dim} Elo" for dim in anony_arena_dfs.keys()] + ["⭐ Avg. Arena Elo Ranking", "📮 Votes"],
                     datatype=[
                         "str",
                         "markdown",
                         "number",
                         "number",
+                        "number",
+                        "number",
+                        "number",
+                        "number",
+                        "number"
                     ],
                     value=arena_table_vals,
+                    # value=model_table_df,
                     elem_id="arena_leaderboard_dataframe",
                     height=700,
+                    column_widths=[50, 200, 100, 100, 100, 100, 100, 100, 100],
                     wrap=True,
                 )
+            # with gr.Tab("Full Leaderboard", id=1):
+            #     md = make_full_leaderboard_md(full_elo_results)
+            #     gr.Markdown(md, elem_id="leaderboard_markdown")
+            #     full_table_vals = get_full_table(anony_arena_df, full_arena_df, model_table_df)
+            #     gr.Dataframe(
+            #         headers=[
+            #             "🤖 Model",
+            #             "⭐ Arena Elo (anony)",
+            #             "⭐ Arena Elo (full)",
+            #             "Organization",
+            #             "License",
+            #         ],
+            #         datatype=["markdown", "number", "number", "str", "str"],
+            #         value=full_table_vals,
+            #         elem_id="full_leaderboard_dataframe",
+            #         column_widths=[200, 100, 100, 100, 150, 150],
+            #         height=700,
+            #         wrap=True,
+            #     )
         if not show_plot:
             gr.Markdown(
                 """ ## We are still collecting more votes on more models. The ranking will be updated very fruquently. Please stay tuned!
     else:
         pass
+    # leader_component_values[:] = [md, p1, p2, p3, p4]
     """
     with gr.Row():

serve/utils.py CHANGED Viewed

@@ -66,6 +66,7 @@ block_css = """
 }
 #leaderboard_dataframe td {
     line-height: 0.1em;
 }
 #about_markdown {
     font-size: 110%

 }
 #leaderboard_dataframe td {
     line-height: 0.1em;
+    font-weight: bold;
 }
 #about_markdown {
     font-size: 110%