DongfuJiang commited on
Commit
e70b763
1 Parent(s): 0334436

add filter that models with minimum 50 votes can be on the leaderboard

Browse files
arena_elo/elo_rating/elo_analysis.py CHANGED
@@ -381,6 +381,7 @@ if __name__ == "__main__":
381
  "--rating-system", type=str, choices=["bt", "elo"], default="bt"
382
  )
383
  parser.add_argument("--exclude-tie", action="store_true", default=False)
 
384
  args = parser.parse_args()
385
 
386
  np.random.seed(42)
@@ -392,7 +393,26 @@ if __name__ == "__main__":
392
  # Read data from all log files
393
  log_files = get_log_files(args.max_num_files)
394
  battles = clean_battle_data(log_files)
395
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
396
  anony_results = report_elo_analysis_results(
397
  battles, rating_system=args.rating_system, num_bootstrap=args.num_bootstrap, anony_only=True
398
  )
 
381
  "--rating-system", type=str, choices=["bt", "elo"], default="bt"
382
  )
383
  parser.add_argument("--exclude-tie", action="store_true", default=False)
384
+ parser.add_argument("--min_num_battles_per_model", type=int, default=50)
385
  args = parser.parse_args()
386
 
387
  np.random.seed(42)
 
393
  # Read data from all log files
394
  log_files = get_log_files(args.max_num_files)
395
  battles = clean_battle_data(log_files)
396
+
397
+ if args.min_num_battles_per_model:
398
+ num_battles_per_model = defaultdict(int)
399
+ # use pd
400
+ for _, battle in battles.iterrows():
401
+ num_battles_per_model[battle["model_a"]] += 1
402
+ num_battles_per_model[battle["model_b"]] += 1
403
+ to_remove_models = [
404
+ model for model, num_battles in num_battles_per_model.items() if num_battles < args.min_num_battles_per_model
405
+ ]
406
+ battles_with_enough_battles = battles[
407
+ ~battles["model_a"].isin(to_remove_models) & ~battles["model_b"].isin(to_remove_models)
408
+ ]
409
+ # battles_with_enough_battles = [
410
+ # battle for battle in battles if battle["model_a"] not in to_remove_models and battle["model_b"] not in to_remove_models
411
+ # ]
412
+ print(f"Remove models with less than {args.min_num_battles_per_model} battles: {to_remove_models}")
413
+ print(f"Number of battles: {len(battles)} -> {len(battles_with_enough_battles)}")
414
+ battles = battles_with_enough_battles
415
+
416
  anony_results = report_elo_analysis_results(
417
  battles, rating_system=args.rating_system, num_bootstrap=args.num_bootstrap, anony_only=True
418
  )
arena_elo/results/20240808/elo_results_t2i_generation.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a0bc5842a13e68e43e47f8713a71a952307787344205b60f952aefc58f7b7dc
3
- size 85772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cd4df0f34c643b04a068278e6fb02cf6d534a614c76ca1ef0936462de4cf561
3
+ size 73808
arena_elo/results/20240808/t2i_generation_leaderboard.csv CHANGED
@@ -1,18 +1,14 @@
1
  key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
2
- FluxTimestep,FluxTimestep,1214.8405410597163,1268.8081253338864,N/A,N/A,N/A
3
- FluxGuidance,FluxGuidance,1171.9302438783548,1280.7274155318826,N/A,N/A,N/A
4
- PlayGround V2.5,PlayGround V2.5,1139.5104041681875,1093.8755129403712,Playground v2.5 Community License,Playground,https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic
5
- PlayGround V2,PlayGround V2,1080.267768035212,1031.711789812054,Playground v2 Community License,Playground,https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic
6
- HunyuanDiT,HunyuanDiT,1057.3948978511037,993.5300592549087,tencent-hunyuan-community,Tencent,https://huggingface.co/Tencent-Hunyuan/HunyuanDiT
7
- StableCascade,StableCascade,1043.5554301408906,1000.1825546307998,stable-cascade-nc-community (other),Stability AI,https://huggingface.co/stabilityai/stable-cascade
8
- AuraFlow,AuraFlow,1043.0774152314,905.4155674173782,N/A,N/A,N/A
9
- SDXLLightning,SDXLLightning,1030.176037263112,987.7949247755674,openrail++,ByteDance,https://huggingface.co/ByteDance/SDXL-Lightning
10
- PixArtAlpha,PixArtAlpha,1028.2881123547797,971.2154365327415,openrail++,PixArt-alpha,https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS
11
- PixArtSigma,PixArtSigma,1026.6892332008106,979.1841654725179,openrail++,PixArt-alpha,https://fal.ai/models/fal-ai/pixart-sigma
12
- SD3,SD3,1011.7515067036493,956.7215604444668,stabilityai-nc-research-community,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-3-medium
13
- SDXL,SDXL,974.0626501167529,926.9049335091465,openrail++,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
14
- SDXLTurbo,SDXLTurbo,917.1250431688785,867.9511006927216,sai-nc-community (other),Stability AI,https://huggingface.co/stabilityai/sdxl-turbo
15
- LCM(v1.5/XL),LCM(v1.5/XL),912.4100279149377,859.7939054983376,openrail++,Latent Consistency,https://fal.ai/models/fal-ai/fast-lcm-diffusion/api
16
- OpenJourney,OpenJourney,835.7176337894391,782.8288973266096,creativeml-openrail-m,PromptHero,https://huggingface.co/prompthero/openjourney
17
- LCM,LCM,796.5741829165427,762.9098656374077,MIT License,Tsinghua University,https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7
18
- Kolors,Kolors,716.6288722062632,792.3328249865597,N/A,N/A,N/A
 
1
  key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
2
+ PlayGround V2.5,PlayGround V2.5,1150.727295281438,1152.9003848922068,Playground v2.5 Community License,Playground,https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic
3
+ PlayGround V2,PlayGround V2,1091.4598155811693,1090.650200464496,Playground v2 Community License,Playground,https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic
4
+ HunyuanDiT,HunyuanDiT,1068.3879352088109,1052.2174359779524,tencent-hunyuan-community,Tencent,https://huggingface.co/Tencent-Hunyuan/HunyuanDiT
5
+ StableCascade,StableCascade,1054.7995676029338,1059.1108432213439,stable-cascade-nc-community (other),Stability AI,https://huggingface.co/stabilityai/stable-cascade
6
+ SDXLLightning,SDXLLightning,1041.640169661881,1046.7852182436861,openrail++,ByteDance,https://huggingface.co/ByteDance/SDXL-Lightning
7
+ PixArtAlpha,PixArtAlpha,1039.5277933053214,1030.1441243333597,openrail++,PixArt-alpha,https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS
8
+ PixArtSigma,PixArtSigma,1037.9310468633396,1038.2525312486875,openrail++,PixArt-alpha,https://fal.ai/models/fal-ai/pixart-sigma
9
+ SD3,SD3,1022.5155081381295,1016.3178851896334,stabilityai-nc-research-community,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-3-medium
10
+ SDXL,SDXL,985.4596811879688,986.0371048218692,openrail++,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
11
+ SDXLTurbo,SDXLTurbo,928.5870205746824,926.7372919852419,sai-nc-community (other),Stability AI,https://huggingface.co/stabilityai/sdxl-turbo
12
+ LCM(v1.5/XL),LCM(v1.5/XL),924.0677575907189,918.1692261335113,openrail++,Latent Consistency,https://fal.ai/models/fal-ai/fast-lcm-diffusion/api
13
+ OpenJourney,OpenJourney,847.0543813992838,841.8535302298862,creativeml-openrail-m,PromptHero,https://huggingface.co/prompthero/openjourney
14
+ LCM,LCM,807.8420276043212,821.876561324668,MIT License,Tsinghua University,https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7
 
 
 
 
arena_elo/results/latest/elo_results_t2i_generation.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a0bc5842a13e68e43e47f8713a71a952307787344205b60f952aefc58f7b7dc
3
- size 85772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cd4df0f34c643b04a068278e6fb02cf6d534a614c76ca1ef0936462de4cf561
3
+ size 73808
arena_elo/results/latest/t2i_generation_leaderboard.csv CHANGED
@@ -1,18 +1,14 @@
1
  key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
2
- FluxTimestep,FluxTimestep,1214.8405410597163,1268.8081253338864,N/A,N/A,N/A
3
- FluxGuidance,FluxGuidance,1171.9302438783548,1280.7274155318826,N/A,N/A,N/A
4
- PlayGround V2.5,PlayGround V2.5,1139.5104041681875,1093.8755129403712,Playground v2.5 Community License,Playground,https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic
5
- PlayGround V2,PlayGround V2,1080.267768035212,1031.711789812054,Playground v2 Community License,Playground,https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic
6
- HunyuanDiT,HunyuanDiT,1057.3948978511037,993.5300592549087,tencent-hunyuan-community,Tencent,https://huggingface.co/Tencent-Hunyuan/HunyuanDiT
7
- StableCascade,StableCascade,1043.5554301408906,1000.1825546307998,stable-cascade-nc-community (other),Stability AI,https://huggingface.co/stabilityai/stable-cascade
8
- AuraFlow,AuraFlow,1043.0774152314,905.4155674173782,N/A,N/A,N/A
9
- SDXLLightning,SDXLLightning,1030.176037263112,987.7949247755674,openrail++,ByteDance,https://huggingface.co/ByteDance/SDXL-Lightning
10
- PixArtAlpha,PixArtAlpha,1028.2881123547797,971.2154365327415,openrail++,PixArt-alpha,https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS
11
- PixArtSigma,PixArtSigma,1026.6892332008106,979.1841654725179,openrail++,PixArt-alpha,https://fal.ai/models/fal-ai/pixart-sigma
12
- SD3,SD3,1011.7515067036493,956.7215604444668,stabilityai-nc-research-community,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-3-medium
13
- SDXL,SDXL,974.0626501167529,926.9049335091465,openrail++,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
14
- SDXLTurbo,SDXLTurbo,917.1250431688785,867.9511006927216,sai-nc-community (other),Stability AI,https://huggingface.co/stabilityai/sdxl-turbo
15
- LCM(v1.5/XL),LCM(v1.5/XL),912.4100279149377,859.7939054983376,openrail++,Latent Consistency,https://fal.ai/models/fal-ai/fast-lcm-diffusion/api
16
- OpenJourney,OpenJourney,835.7176337894391,782.8288973266096,creativeml-openrail-m,PromptHero,https://huggingface.co/prompthero/openjourney
17
- LCM,LCM,796.5741829165427,762.9098656374077,MIT License,Tsinghua University,https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7
18
- Kolors,Kolors,716.6288722062632,792.3328249865597,N/A,N/A,N/A
 
1
  key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
2
+ PlayGround V2.5,PlayGround V2.5,1150.727295281438,1152.9003848922068,Playground v2.5 Community License,Playground,https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic
3
+ PlayGround V2,PlayGround V2,1091.4598155811693,1090.650200464496,Playground v2 Community License,Playground,https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic
4
+ HunyuanDiT,HunyuanDiT,1068.3879352088109,1052.2174359779524,tencent-hunyuan-community,Tencent,https://huggingface.co/Tencent-Hunyuan/HunyuanDiT
5
+ StableCascade,StableCascade,1054.7995676029338,1059.1108432213439,stable-cascade-nc-community (other),Stability AI,https://huggingface.co/stabilityai/stable-cascade
6
+ SDXLLightning,SDXLLightning,1041.640169661881,1046.7852182436861,openrail++,ByteDance,https://huggingface.co/ByteDance/SDXL-Lightning
7
+ PixArtAlpha,PixArtAlpha,1039.5277933053214,1030.1441243333597,openrail++,PixArt-alpha,https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS
8
+ PixArtSigma,PixArtSigma,1037.9310468633396,1038.2525312486875,openrail++,PixArt-alpha,https://fal.ai/models/fal-ai/pixart-sigma
9
+ SD3,SD3,1022.5155081381295,1016.3178851896334,stabilityai-nc-research-community,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-3-medium
10
+ SDXL,SDXL,985.4596811879688,986.0371048218692,openrail++,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
11
+ SDXLTurbo,SDXLTurbo,928.5870205746824,926.7372919852419,sai-nc-community (other),Stability AI,https://huggingface.co/stabilityai/sdxl-turbo
12
+ LCM(v1.5/XL),LCM(v1.5/XL),924.0677575907189,918.1692261335113,openrail++,Latent Consistency,https://fal.ai/models/fal-ai/fast-lcm-diffusion/api
13
+ OpenJourney,OpenJourney,847.0543813992838,841.8535302298862,creativeml-openrail-m,PromptHero,https://huggingface.co/prompthero/openjourney
14
+ LCM,LCM,807.8420276043212,821.876561324668,MIT License,Tsinghua University,https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7
 
 
 
 
serve/leaderboard.py CHANGED
@@ -148,6 +148,7 @@ def get_arena_table(arena_df, model_table_df):
148
  lower_diff = round(arena_df.iloc[i]["rating"] - arena_df.iloc[i]["rating_q025"])
149
  row.append(f"+{upper_diff}/-{lower_diff}")
150
  # num battles
 
151
  row.append(round(arena_df.iloc[i]["num_battles"]))
152
  # Organization
153
  row.append(
 
148
  lower_diff = round(arena_df.iloc[i]["rating"] - arena_df.iloc[i]["rating_q025"])
149
  row.append(f"+{upper_diff}/-{lower_diff}")
150
  # num battles
151
+ print(arena_df.iloc[i])
152
  row.append(round(arena_df.iloc[i]["num_battles"]))
153
  # Organization
154
  row.append(