Spaces:
Running
on
Zero
Running
on
Zero
DongfuJiang
commited on
Commit
•
e70b763
1
Parent(s):
0334436
add filter that models with minimum 50 votes can be on the leaderboard
Browse files- arena_elo/elo_rating/elo_analysis.py +21 -1
- arena_elo/results/20240808/elo_results_t2i_generation.pkl +2 -2
- arena_elo/results/20240808/t2i_generation_leaderboard.csv +13 -17
- arena_elo/results/latest/elo_results_t2i_generation.pkl +2 -2
- arena_elo/results/latest/t2i_generation_leaderboard.csv +13 -17
- serve/leaderboard.py +1 -0
arena_elo/elo_rating/elo_analysis.py
CHANGED
@@ -381,6 +381,7 @@ if __name__ == "__main__":
|
|
381 |
"--rating-system", type=str, choices=["bt", "elo"], default="bt"
|
382 |
)
|
383 |
parser.add_argument("--exclude-tie", action="store_true", default=False)
|
|
|
384 |
args = parser.parse_args()
|
385 |
|
386 |
np.random.seed(42)
|
@@ -392,7 +393,26 @@ if __name__ == "__main__":
|
|
392 |
# Read data from all log files
|
393 |
log_files = get_log_files(args.max_num_files)
|
394 |
battles = clean_battle_data(log_files)
|
395 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
396 |
anony_results = report_elo_analysis_results(
|
397 |
battles, rating_system=args.rating_system, num_bootstrap=args.num_bootstrap, anony_only=True
|
398 |
)
|
|
|
381 |
"--rating-system", type=str, choices=["bt", "elo"], default="bt"
|
382 |
)
|
383 |
parser.add_argument("--exclude-tie", action="store_true", default=False)
|
384 |
+
parser.add_argument("--min_num_battles_per_model", type=int, default=50)
|
385 |
args = parser.parse_args()
|
386 |
|
387 |
np.random.seed(42)
|
|
|
393 |
# Read data from all log files
|
394 |
log_files = get_log_files(args.max_num_files)
|
395 |
battles = clean_battle_data(log_files)
|
396 |
+
|
397 |
+
if args.min_num_battles_per_model:
|
398 |
+
num_battles_per_model = defaultdict(int)
|
399 |
+
# use pd
|
400 |
+
for _, battle in battles.iterrows():
|
401 |
+
num_battles_per_model[battle["model_a"]] += 1
|
402 |
+
num_battles_per_model[battle["model_b"]] += 1
|
403 |
+
to_remove_models = [
|
404 |
+
model for model, num_battles in num_battles_per_model.items() if num_battles < args.min_num_battles_per_model
|
405 |
+
]
|
406 |
+
battles_with_enough_battles = battles[
|
407 |
+
~battles["model_a"].isin(to_remove_models) & ~battles["model_b"].isin(to_remove_models)
|
408 |
+
]
|
409 |
+
# battles_with_enough_battles = [
|
410 |
+
# battle for battle in battles if battle["model_a"] not in to_remove_models and battle["model_b"] not in to_remove_models
|
411 |
+
# ]
|
412 |
+
print(f"Remove models with less than {args.min_num_battles_per_model} battles: {to_remove_models}")
|
413 |
+
print(f"Number of battles: {len(battles)} -> {len(battles_with_enough_battles)}")
|
414 |
+
battles = battles_with_enough_battles
|
415 |
+
|
416 |
anony_results = report_elo_analysis_results(
|
417 |
battles, rating_system=args.rating_system, num_bootstrap=args.num_bootstrap, anony_only=True
|
418 |
)
|
arena_elo/results/20240808/elo_results_t2i_generation.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4cd4df0f34c643b04a068278e6fb02cf6d534a614c76ca1ef0936462de4cf561
|
3 |
+
size 73808
|
arena_elo/results/20240808/t2i_generation_leaderboard.csv
CHANGED
@@ -1,18 +1,14 @@
|
|
1 |
key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
LCM(v1.5/XL),LCM(v1.5/XL),912.4100279149377,859.7939054983376,openrail++,Latent Consistency,https://fal.ai/models/fal-ai/fast-lcm-diffusion/api
|
16 |
-
OpenJourney,OpenJourney,835.7176337894391,782.8288973266096,creativeml-openrail-m,PromptHero,https://huggingface.co/prompthero/openjourney
|
17 |
-
LCM,LCM,796.5741829165427,762.9098656374077,MIT License,Tsinghua University,https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7
|
18 |
-
Kolors,Kolors,716.6288722062632,792.3328249865597,N/A,N/A,N/A
|
|
|
1 |
key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
|
2 |
+
PlayGround V2.5,PlayGround V2.5,1150.727295281438,1152.9003848922068,Playground v2.5 Community License,Playground,https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic
|
3 |
+
PlayGround V2,PlayGround V2,1091.4598155811693,1090.650200464496,Playground v2 Community License,Playground,https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic
|
4 |
+
HunyuanDiT,HunyuanDiT,1068.3879352088109,1052.2174359779524,tencent-hunyuan-community,Tencent,https://huggingface.co/Tencent-Hunyuan/HunyuanDiT
|
5 |
+
StableCascade,StableCascade,1054.7995676029338,1059.1108432213439,stable-cascade-nc-community (other),Stability AI,https://huggingface.co/stabilityai/stable-cascade
|
6 |
+
SDXLLightning,SDXLLightning,1041.640169661881,1046.7852182436861,openrail++,ByteDance,https://huggingface.co/ByteDance/SDXL-Lightning
|
7 |
+
PixArtAlpha,PixArtAlpha,1039.5277933053214,1030.1441243333597,openrail++,PixArt-alpha,https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS
|
8 |
+
PixArtSigma,PixArtSigma,1037.9310468633396,1038.2525312486875,openrail++,PixArt-alpha,https://fal.ai/models/fal-ai/pixart-sigma
|
9 |
+
SD3,SD3,1022.5155081381295,1016.3178851896334,stabilityai-nc-research-community,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-3-medium
|
10 |
+
SDXL,SDXL,985.4596811879688,986.0371048218692,openrail++,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
|
11 |
+
SDXLTurbo,SDXLTurbo,928.5870205746824,926.7372919852419,sai-nc-community (other),Stability AI,https://huggingface.co/stabilityai/sdxl-turbo
|
12 |
+
LCM(v1.5/XL),LCM(v1.5/XL),924.0677575907189,918.1692261335113,openrail++,Latent Consistency,https://fal.ai/models/fal-ai/fast-lcm-diffusion/api
|
13 |
+
OpenJourney,OpenJourney,847.0543813992838,841.8535302298862,creativeml-openrail-m,PromptHero,https://huggingface.co/prompthero/openjourney
|
14 |
+
LCM,LCM,807.8420276043212,821.876561324668,MIT License,Tsinghua University,https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7
|
|
|
|
|
|
|
|
arena_elo/results/latest/elo_results_t2i_generation.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4cd4df0f34c643b04a068278e6fb02cf6d534a614c76ca1ef0936462de4cf561
|
3 |
+
size 73808
|
arena_elo/results/latest/t2i_generation_leaderboard.csv
CHANGED
@@ -1,18 +1,14 @@
|
|
1 |
key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
LCM(v1.5/XL),LCM(v1.5/XL),912.4100279149377,859.7939054983376,openrail++,Latent Consistency,https://fal.ai/models/fal-ai/fast-lcm-diffusion/api
|
16 |
-
OpenJourney,OpenJourney,835.7176337894391,782.8288973266096,creativeml-openrail-m,PromptHero,https://huggingface.co/prompthero/openjourney
|
17 |
-
LCM,LCM,796.5741829165427,762.9098656374077,MIT License,Tsinghua University,https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7
|
18 |
-
Kolors,Kolors,716.6288722062632,792.3328249865597,N/A,N/A,N/A
|
|
|
1 |
key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
|
2 |
+
PlayGround V2.5,PlayGround V2.5,1150.727295281438,1152.9003848922068,Playground v2.5 Community License,Playground,https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic
|
3 |
+
PlayGround V2,PlayGround V2,1091.4598155811693,1090.650200464496,Playground v2 Community License,Playground,https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic
|
4 |
+
HunyuanDiT,HunyuanDiT,1068.3879352088109,1052.2174359779524,tencent-hunyuan-community,Tencent,https://huggingface.co/Tencent-Hunyuan/HunyuanDiT
|
5 |
+
StableCascade,StableCascade,1054.7995676029338,1059.1108432213439,stable-cascade-nc-community (other),Stability AI,https://huggingface.co/stabilityai/stable-cascade
|
6 |
+
SDXLLightning,SDXLLightning,1041.640169661881,1046.7852182436861,openrail++,ByteDance,https://huggingface.co/ByteDance/SDXL-Lightning
|
7 |
+
PixArtAlpha,PixArtAlpha,1039.5277933053214,1030.1441243333597,openrail++,PixArt-alpha,https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS
|
8 |
+
PixArtSigma,PixArtSigma,1037.9310468633396,1038.2525312486875,openrail++,PixArt-alpha,https://fal.ai/models/fal-ai/pixart-sigma
|
9 |
+
SD3,SD3,1022.5155081381295,1016.3178851896334,stabilityai-nc-research-community,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-3-medium
|
10 |
+
SDXL,SDXL,985.4596811879688,986.0371048218692,openrail++,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
|
11 |
+
SDXLTurbo,SDXLTurbo,928.5870205746824,926.7372919852419,sai-nc-community (other),Stability AI,https://huggingface.co/stabilityai/sdxl-turbo
|
12 |
+
LCM(v1.5/XL),LCM(v1.5/XL),924.0677575907189,918.1692261335113,openrail++,Latent Consistency,https://fal.ai/models/fal-ai/fast-lcm-diffusion/api
|
13 |
+
OpenJourney,OpenJourney,847.0543813992838,841.8535302298862,creativeml-openrail-m,PromptHero,https://huggingface.co/prompthero/openjourney
|
14 |
+
LCM,LCM,807.8420276043212,821.876561324668,MIT License,Tsinghua University,https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7
|
|
|
|
|
|
|
|
serve/leaderboard.py
CHANGED
@@ -148,6 +148,7 @@ def get_arena_table(arena_df, model_table_df):
|
|
148 |
lower_diff = round(arena_df.iloc[i]["rating"] - arena_df.iloc[i]["rating_q025"])
|
149 |
row.append(f"+{upper_diff}/-{lower_diff}")
|
150 |
# num battles
|
|
|
151 |
row.append(round(arena_df.iloc[i]["num_battles"]))
|
152 |
# Organization
|
153 |
row.append(
|
|
|
148 |
lower_diff = round(arena_df.iloc[i]["rating"] - arena_df.iloc[i]["rating_q025"])
|
149 |
row.append(f"+{upper_diff}/-{lower_diff}")
|
150 |
# num battles
|
151 |
+
print(arena_df.iloc[i])
|
152 |
row.append(round(arena_df.iloc[i]["num_battles"]))
|
153 |
# Organization
|
154 |
row.append(
|