This view is limited to 50 files because it contains too many changes.  See the raw diff here.
Files changed (50) hide show
  1. .gitignore +0 -175
  2. .gitmodules +3 -0
  3. .idea/.gitignore +0 -8
  4. .idea/GenAI-Arena.iml +0 -15
  5. .idea/inspectionProfiles/profiles_settings.xml +0 -6
  6. .idea/modules.xml +0 -8
  7. .idea/vcs.xml +0 -6
  8. README.md +3 -37
  9. app.py +9 -27
  10. arena_elo/edition_model_info.json +37 -0
  11. arena_elo/elo_rating/clean_battle_data.py +134 -131
  12. arena_elo/elo_rating/elo_analysis.py +5 -40
  13. arena_elo/elo_rating/generate_leaderboard.py +17 -14
  14. arena_elo/elo_rating/model_registry.py +578 -0
  15. arena_elo/elo_rating/upload_battle_data.py +122 -97
  16. arena_elo/elo_rating/utils.py +4 -12
  17. arena_elo/generation_model_info.json +42 -0
  18. arena_elo/results/20240315/elo_results_image_editing.pkl +2 -2
  19. arena_elo/results/20240327/clean_battle_t2i_generation.json +0 -0
  20. arena_elo/results/20240327/elo_results_t2i_generation.pkl +2 -2
  21. arena_elo/results/20240327/t2i_generation_leaderboard.csv +10 -9
  22. arena_elo/results/20240328/clean_battle_image_editing.json +0 -890
  23. arena_elo/results/20240328/elo_results_image_editing.pkl +0 -3
  24. arena_elo/results/20240328/image_editing_leaderboard.csv +0 -8
  25. arena_elo/results/20240330/elo_results_t2i_generation.pkl +0 -3
  26. arena_elo/results/20240330/t2i_generation_leaderboard.csv +0 -10
  27. arena_elo/results/20240408/clean_battle_t2i_generation.json +0 -0
  28. arena_elo/results/20240408/elo_results_t2i_generation.pkl +0 -3
  29. arena_elo/results/20240408/t2i_generation_leaderboard.csv +0 -10
  30. arena_elo/results/20240411/clean_battle_image_editing.json +0 -906
  31. arena_elo/results/20240411/clean_battle_t2i_generation.json +0 -0
  32. arena_elo/results/20240411/elo_results_image_editing.pkl +0 -3
  33. arena_elo/results/20240411/elo_results_t2i_generation.pkl +0 -3
  34. arena_elo/results/20240411/image_editing_leaderboard.csv +0 -8
  35. arena_elo/results/20240411/t2i_generation_leaderboard.csv +0 -10
  36. arena_elo/results/20240428/elo_results_image_editing.pkl +0 -3
  37. arena_elo/results/20240428/image_editing_leaderboard.csv +0 -8
  38. arena_elo/results/20240501/clean_battle_t2i_generation.json +0 -0
  39. arena_elo/results/20240501/elo_results_t2i_generation.pkl +0 -3
  40. arena_elo/results/20240501/t2i_generation_leaderboard.csv +0 -11
  41. arena_elo/results/20240516/clean_battle_image_editing.json +0 -1578
  42. arena_elo/results/20240516/elo_results_image_editing.pkl +0 -3
  43. arena_elo/results/20240516/image_editing_leaderboard.csv +0 -10
  44. arena_elo/results/20240517/clean_battle_t2i_generation.json +0 -0
  45. arena_elo/results/20240517/elo_results_t2i_generation.pkl +0 -3
  46. arena_elo/results/20240517/t2i_generation_leaderboard.csv +0 -12
  47. arena_elo/results/20240525/clean_battle_image_editing.json +0 -0
  48. arena_elo/results/20240525/clean_battle_t2i_generation.json +0 -0
  49. arena_elo/results/20240525/elo_results_image_editing.pkl +0 -3
  50. arena_elo/results/20240525/elo_results_t2i_generation.pkl +0 -3
.gitignore DELETED
@@ -1,175 +0,0 @@
1
- checkpoints/
2
-
3
- # Byte-compiled / optimized / DLL files
4
- __pycache__/
5
- *.py[cod]
6
- *$py.class
7
- src/
8
- # C extensions
9
- *.so
10
- temp
11
-
12
- # Distribution / packaging
13
- .Python
14
- build/
15
- develop-eggs/
16
- dist/
17
- downloads/
18
- eggs/
19
- .eggs/
20
- lib/
21
- lib64/
22
- parts/
23
- sdist/
24
- var/
25
- wheels/
26
- share/python-wheels/
27
- *.egg-info/
28
- .installed.cfg
29
- *.egg
30
- MANIFEST
31
-
32
- # PyInstaller
33
- # Usually these files are written by a python script from a template
34
- # before PyInstaller builds the exe, so as to inject date/other infos into it.
35
- *.manifest
36
- *.spec
37
-
38
- # Installer logs
39
- pip-log.txt
40
- pip-delete-this-directory.txt
41
-
42
- # Unit test / coverage reports
43
- htmlcov/
44
- .tox/
45
- .nox/
46
- .coverage
47
- .coverage.*
48
- .cache
49
- nosetests.xml
50
- coverage.xml
51
- *.cover
52
- *.py,cover
53
- .hypothesis/
54
- .pytest_cache/
55
- cover/
56
-
57
- # Translations
58
- *.mo
59
- *.pot
60
-
61
- # Django stuff:
62
- *.log
63
- local_settings.py
64
- db.sqlite3
65
- db.sqlite3-journal
66
-
67
- # Flask stuff:
68
- instance/
69
- .webassets-cache
70
-
71
- # Scrapy stuff:
72
- .scrapy
73
-
74
- # Sphinx documentation
75
- docs/_build/
76
-
77
- # PyBuilder
78
- .pybuilder/
79
- target/
80
-
81
- # Jupyter Notebook
82
- .ipynb_checkpoints
83
-
84
- # IPython
85
- profile_default/
86
- ipython_config.py
87
-
88
- # pyenv
89
- # For a library or package, you might want to ignore these files since the code is
90
- # intended to run in multiple environments; otherwise, check them in:
91
- # .python-version
92
-
93
- # pipenv
94
- # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95
- # However, in case of collaboration, if having platform-specific dependencies or dependencies
96
- # having no cross-platform support, pipenv may install dependencies that don't work, or not
97
- # install all needed dependencies.
98
- #Pipfile.lock
99
-
100
- # poetry
101
- # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102
- # This is especially recommended for binary packages to ensure reproducibility, and is more
103
- # commonly ignored for libraries.
104
- # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105
- #poetry.lock
106
-
107
- # pdm
108
- # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109
- #pdm.lock
110
- # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111
- # in version control.
112
- # https://pdm.fming.dev/#use-with-ide
113
- .pdm.toml
114
-
115
- # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
116
- __pypackages__/
117
-
118
- # Celery stuff
119
- celerybeat-schedule
120
- celerybeat.pid
121
-
122
- # SageMath parsed files
123
- *.sage.py
124
-
125
- # Environments
126
- .env
127
- .venv
128
- env/
129
- venv/
130
- ENV/
131
- env.bak/
132
- venv.bak/
133
-
134
- # Spyder project settings
135
- .spyderproject
136
- .spyproject
137
-
138
- # Rope project settings
139
- .ropeproject
140
-
141
- # mkdocs documentation
142
- /site
143
-
144
- # mypy
145
- .mypy_cache/
146
- .dmypy.json
147
- dmypy.json
148
-
149
- # Pyre type checker
150
- .pyre/
151
-
152
- # pytype static type analyzer
153
- .pytype/
154
-
155
- # Cython debug symbols
156
- cython_debug/
157
-
158
- # PyCharm
159
- # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
160
- # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
161
- # and can be added to the global gitignore or merged into this file. For a more nuclear
162
- # option (not recommended) you can uncomment the following to ignore the entire idea folder.
163
- #.idea/
164
- /tmp
165
- /logs
166
- /*.json
167
- /*.jpg
168
- /*.ipynb
169
- /GenAI-Arena-hf-logs
170
- /3DGen-Arena-logs*
171
- /tmp*
172
- /arena_elo/results/**/*.jpg
173
- /arena_elo/results/**/*.png
174
- /arena_elo/6_04_log_results
175
- /arena_elo/update_elo_rating_6_04.sh
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitmodules CHANGED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [submodule "GenAI-Arena-hf-logs"]
2
+ path = GenAI-Arena-hf-logs
3
+ url = https://github.com/jdf-prog/GenAI-Arena-hf-logs.git
.idea/.gitignore DELETED
@@ -1,8 +0,0 @@
1
- # Default ignored files
2
- /shelf/
3
- /workspace.xml
4
- # Editor-based HTTP Client requests
5
- /httpRequests/
6
- # Datasource local storage ignored files
7
- /dataSources/
8
- /dataSources.local.xml
 
 
 
 
 
 
 
 
 
.idea/GenAI-Arena.iml DELETED
@@ -1,15 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <module type="PYTHON_MODULE" version="4">
3
- <component name="NewModuleRootManager">
4
- <content url="file://$MODULE_DIR$" />
5
- <orderEntry type="inheritedJdk" />
6
- <orderEntry type="sourceFolder" forTests="false" />
7
- </component>
8
- <component name="PyDocumentationSettings">
9
- <option name="format" value="GOOGLE" />
10
- <option name="myDocStringFormat" value="Google" />
11
- </component>
12
- <component name="TemplatesService">
13
- <option name="TEMPLATE_CONFIGURATION" value="Jinja2" />
14
- </component>
15
- </module>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.idea/inspectionProfiles/profiles_settings.xml DELETED
@@ -1,6 +0,0 @@
1
- <component name="InspectionProjectProfileManager">
2
- <settings>
3
- <option name="USE_PROJECT_PROFILE" value="false" />
4
- <version value="1.0" />
5
- </settings>
6
- </component>
 
 
 
 
 
 
 
.idea/modules.xml DELETED
@@ -1,8 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project version="4">
3
- <component name="ProjectModuleManager">
4
- <modules>
5
- <module fileurl="file://$PROJECT_DIR$/.idea/GenAI-Arena.iml" filepath="$PROJECT_DIR$/.idea/GenAI-Arena.iml" />
6
- </modules>
7
- </component>
8
- </project>
 
 
 
 
 
 
 
 
 
.idea/vcs.xml DELETED
@@ -1,6 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project version="4">
3
- <component name="VcsDirectoryMappings">
4
- <mapping directory="" vcs="Git" />
5
- </component>
6
- </project>
 
 
 
 
 
 
 
README.md CHANGED
@@ -4,44 +4,10 @@ emoji: 📈
4
  colorFrom: purple
5
  colorTo: pink
6
  sdk: gradio
7
- sdk_version: 4.41.0
8
- python_version: 3.12
9
  app_file: app.py
10
- pinned: true
11
  license: mit
12
- tags:
13
- - arena
14
- - leaderboard
15
- short_description: Realtime Image/Video Gen AI Arena
16
  ---
17
 
18
- ## Installation
19
-
20
- - for cuda 11.8
21
- ```bash
22
- conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
23
- pip3 install -U xformers --index-url https://download.pytorch.org/whl/cu118
24
- pip install -r requirements.txt
25
- ```
26
- - for cuda 12.1
27
- ```bash
28
- conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia
29
- pip install -r requirements.txt
30
- ```
31
-
32
- ## Start Hugging Face UI
33
- ```bash
34
- python app.py
35
- ```
36
-
37
- ## Start Log server
38
- ```bash
39
- uvicorn serve.log_server:app --reload --port 22005 --host 0.0.0.0
40
- ```
41
-
42
- ## Update leaderboard
43
- ```bash
44
- cd arena_elo && bash update_leaderboard.sh
45
- ```
46
-
47
- Paper: arxiv.org/abs/2406.04485
 
4
  colorFrom: purple
5
  colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 4.21.0
 
8
  app_file: app.py
9
+ pinned: false
10
  license: mit
 
 
 
 
11
  ---
12
 
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -2,7 +2,6 @@ import gradio as gr
2
  import os
3
  from serve.gradio_web import *
4
  from serve.gradio_web_image_editing import *
5
- from serve.gradio_web_video_generation import *
6
  from serve.leaderboard import build_leaderboard_tab
7
  from model.model_manager import ModelManager
8
  from pathlib import Path
@@ -24,11 +23,13 @@ def build_combine_demo(models, elo_results_file, leaderboard_table_file):
24
  with gr.Tab("Generation Arena (side-by-side)", id=1):
25
  build_side_by_side_ui_named(models)
26
 
27
- with gr.Tab("Generation Playground", id=2): #Direct Chat
28
  build_single_model_ui(models, add_promotion_links=True)
29
  if elo_results_file:
30
  with gr.Tab("Generation Leaderboard", id=3):
31
  build_leaderboard_tab(elo_results_file['t2i_generation'], leaderboard_table_file['t2i_generation'])
 
 
32
 
33
  with gr.Tab("Image Edition", id=5):
34
  with gr.Tabs() as tabs_ie:
@@ -38,27 +39,13 @@ def build_combine_demo(models, elo_results_file, leaderboard_table_file):
38
  with gr.Tab("Edition Arena (side-by-side)", id=6):
39
  build_side_by_side_ui_named_ie(models)
40
 
41
- with gr.Tab("Edition Playground", id=7): #Direct Chat
42
  build_single_model_ui_ie(models, add_promotion_links=True)
43
  if elo_results_file:
44
  with gr.Tab("Edition Leaderboard", id=8):
45
  build_leaderboard_tab(elo_results_file['image_editing'], leaderboard_table_file['image_editing'])
46
-
47
- with gr.Tab("Video Generation", id=10):
48
- with gr.Tabs() as tabs_vg:
49
- with gr.Tab("Video Generation Arena (battle)", id=10):
50
- build_side_by_side_ui_anony_vg(models)
51
-
52
- with gr.Tab("Video Generation Arena (side-by-side)", id=11):
53
- build_side_by_side_ui_named_vg(models)
54
-
55
- with gr.Tab("Video Generation Playground", id=12): #Direct Chat
56
- build_single_model_ui_vg(models, add_promotion_links=True)
57
- if elo_results_file and 'video_generation' in elo_results_file:
58
- with gr.Tab("Video Generation Leaderboard", id=13):
59
- build_leaderboard_tab(elo_results_file['video_generation'], leaderboard_table_file['video_generation'])
60
- with gr.Tab("About Us", id=4):
61
- build_about()
62
 
63
  return demo
64
 
@@ -76,8 +63,6 @@ def load_elo_results(elo_results_dir):
76
  elo_results_file['t2i_generation'] = file
77
  elif 'image_editing' in file.name:
78
  elo_results_file['image_editing'] = file
79
- elif 'video_generation' in file.name:
80
- elo_results_file['video_generation'] = file
81
  else:
82
  raise ValueError(f"Unknown file name: {file.name}")
83
  for file in elo_results_dir.glob('*_leaderboard.csv'):
@@ -85,20 +70,17 @@ def load_elo_results(elo_results_dir):
85
  leaderboard_table_file['t2i_generation'] = file
86
  elif 'image_editing' in file.name:
87
  leaderboard_table_file['image_editing'] = file
88
- elif 'video_generation' in file.name:
89
- leaderboard_table_file['video_generation'] = file
90
  else:
91
  raise ValueError(f"Unknown file name: {file.name}")
92
 
93
  return elo_results_file, leaderboard_table_file
94
 
95
  if __name__ == "__main__":
96
- server_port = int(SERVER_PORT)
97
  root_path = ROOT_PATH
98
  elo_results_dir = ELO_RESULTS_DIR
99
- models = ModelManager(enable_nsfw=False, do_pre_download=True, do_debug_packages=True)
100
- # models = ModelManager(enable_nsfw=False, do_pre_download=False, do_debug_packages=False)
101
 
102
  elo_results_file, leaderboard_table_file = load_elo_results(elo_results_dir)
103
  demo = build_combine_demo(models, elo_results_file, leaderboard_table_file)
104
- demo.queue(max_size=20).launch(server_port=server_port, root_path=ROOT_PATH)
 
2
  import os
3
  from serve.gradio_web import *
4
  from serve.gradio_web_image_editing import *
 
5
  from serve.leaderboard import build_leaderboard_tab
6
  from model.model_manager import ModelManager
7
  from pathlib import Path
 
23
  with gr.Tab("Generation Arena (side-by-side)", id=1):
24
  build_side_by_side_ui_named(models)
25
 
26
+ with gr.Tab("Generation Direct Chat", id=2):
27
  build_single_model_ui(models, add_promotion_links=True)
28
  if elo_results_file:
29
  with gr.Tab("Generation Leaderboard", id=3):
30
  build_leaderboard_tab(elo_results_file['t2i_generation'], leaderboard_table_file['t2i_generation'])
31
+ with gr.Tab("About Us", id=4):
32
+ build_about()
33
 
34
  with gr.Tab("Image Edition", id=5):
35
  with gr.Tabs() as tabs_ie:
 
39
  with gr.Tab("Edition Arena (side-by-side)", id=6):
40
  build_side_by_side_ui_named_ie(models)
41
 
42
+ with gr.Tab("Edition Direct Chat", id=7):
43
  build_single_model_ui_ie(models, add_promotion_links=True)
44
  if elo_results_file:
45
  with gr.Tab("Edition Leaderboard", id=8):
46
  build_leaderboard_tab(elo_results_file['image_editing'], leaderboard_table_file['image_editing'])
47
+ with gr.Tab("About Us", id=9):
48
+ build_about()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  return demo
51
 
 
63
  elo_results_file['t2i_generation'] = file
64
  elif 'image_editing' in file.name:
65
  elo_results_file['image_editing'] = file
 
 
66
  else:
67
  raise ValueError(f"Unknown file name: {file.name}")
68
  for file in elo_results_dir.glob('*_leaderboard.csv'):
 
70
  leaderboard_table_file['t2i_generation'] = file
71
  elif 'image_editing' in file.name:
72
  leaderboard_table_file['image_editing'] = file
 
 
73
  else:
74
  raise ValueError(f"Unknown file name: {file.name}")
75
 
76
  return elo_results_file, leaderboard_table_file
77
 
78
  if __name__ == "__main__":
79
+ server_port = SERVER_PORT
80
  root_path = ROOT_PATH
81
  elo_results_dir = ELO_RESULTS_DIR
82
+ models = ModelManager()
 
83
 
84
  elo_results_file, leaderboard_table_file = load_elo_results(elo_results_dir)
85
  demo = build_combine_demo(models, elo_results_file, leaderboard_table_file)
86
+ demo.queue(max_size=20).launch(server_port=server_port, root_path=ROOT_PATH)
arena_elo/edition_model_info.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "CycleDiffusion": {
3
+ "Link": "https://github.com/ChenWu98/cycle-diffusion",
4
+ "License": "X11",
5
+ "Organization": "Carnegie Mellon University"
6
+ },
7
+ "PNP": {
8
+ "Link": "https://github.com/MichalGeyer/plug-and-play",
9
+ "License": "-",
10
+ "Organization": "Weizmann Institute of Science"
11
+ },
12
+ "InstructPix2Pix": {
13
+ "Link": "https://www.timothybrooks.com/instruct-pix2pix",
14
+ "License": "Copyright 2023 Timothy Brooks, Aleksander Holynski, Alexei A. Efros",
15
+ "Organization": "University of California, Berkeley"
16
+ },
17
+ "Pix2PixZero": {
18
+ "Link": "https://pix2pixzero.github.io",
19
+ "License": "MIT License",
20
+ "Organization": "Carnegie Mellon University, Adobe Research"
21
+ },
22
+ "MagicBrush": {
23
+ "Link": "https://osu-nlp-group.github.io/MagicBrush",
24
+ "License": "CC-BY-4.0",
25
+ "Organization": "The Ohio State University, University of Waterloo"
26
+ },
27
+ "Prompt2prompt": {
28
+ "Link": "https://prompt-to-prompt.github.io",
29
+ "License": "Apache-2.0",
30
+ "Organization": "Google, Tel Aviv University"
31
+ },
32
+ "SDEdit": {
33
+ "Link": "https://sde-image-editing.github.io",
34
+ "License": "MIT License",
35
+ "Organization": "Stanford University"
36
+ }
37
+ }
arena_elo/elo_rating/clean_battle_data.py CHANGED
@@ -18,13 +18,46 @@ ImageFile.LOAD_TRUNCATED_IMAGES = True
18
  from tqdm import tqdm
19
 
20
  from .basic_stats import get_log_files, NUM_SERVERS, LOG_ROOT_DIR
21
- from .utils import detect_language, get_time_stamp_from_date, get_model_info
22
 
23
  VOTES = ["tievote", "leftvote", "rightvote", "bothbad_vote"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- def parse_model_name(model_name):
26
- return NotImplementedError()
27
- return model_source, model_name, model_type
28
 
29
  def remove_html(raw):
30
  if raw.startswith("<h3>"):
@@ -44,19 +77,19 @@ def to_openai_format(messages):
44
 
45
  def replace_model_name(old_name, tstamp):
46
  replace_dict = {
47
- "PlayGroundV2": "PlayGround V2",
48
- "PlayGroundV2.5": "PlayGround V2.5",
49
- "FluxTimestep": "FLUX1schnell",
50
- "FluxGuidance": "FLUX1dev",
51
- "CogVideoX": "CogVideoX-2B"
52
  }
 
 
 
 
 
53
  if old_name in replace_dict:
54
- old_name = replace_dict[old_name]
55
- if "Flux" in old_name:
56
- print(f"Invalid model names: {old_name}")
57
- exit(1)
58
- model_info = get_model_info(old_name)
59
- old_name = model_info.simple_name
60
  return old_name
61
 
62
 
@@ -72,27 +105,18 @@ def read_file(filename):
72
  break
73
  except FileNotFoundError:
74
  time.sleep(2)
75
- except json.JSONDecodeError:
76
- print(f"Error in reading {filename}")
77
- print(row)
78
- exit(0)
79
  return data
80
 
81
 
82
  def read_file_parallel(log_files, num_threads=16):
83
  data_all = []
84
- if num_threads == 1:
85
- for log_file in tqdm(log_files, desc="Reading"):
86
- data_all.extend(read_file(log_file))
87
- return data_all
88
- else:
89
- from multiprocessing import Pool
90
-
91
- with Pool(num_threads) as p:
92
- ret_all = list(tqdm(p.imap(read_file, log_files), total=len(log_files)))
93
- for ret in ret_all:
94
- data_all.extend(ret)
95
- return data_all
96
 
97
  def load_image(image_path):
98
  try:
@@ -103,7 +127,7 @@ def load_image(image_path):
103
  def clean_battle_data(
104
  log_files, exclude_model_names, ban_ip_list=None, sanitize_ip=False, mode="simple", task_name="image_editing"
105
  ):
106
- data = read_file_parallel(log_files, num_threads=1)
107
 
108
  convert_type = {
109
  "leftvote": "model_a",
@@ -121,7 +145,6 @@ def clean_battle_data(
121
  battles = []
122
  for row in tqdm(data, desc="Cleaning"):
123
  if row["models"][0] is None or row["models"][1] is None:
124
- print(f"Invalid model names: {row['models']}")
125
  continue
126
 
127
  # Resolve model names
@@ -140,7 +163,6 @@ def clean_battle_data(
140
  models_public[1] == "" and models_public[0] != ""
141
  ):
142
  ct_invalid += 1
143
- print(f"Invalid model names: {models_public}")
144
  continue
145
 
146
  if models_public[0] == "" or models_public[0] == "Model A":
@@ -151,82 +173,71 @@ def clean_battle_data(
151
  anony = False
152
  models = models_public
153
  if not models_public == models_hidden:
154
- print(f"Model names mismatch: {models_public} vs {models_hidden}")
155
  ct_invalid += 1
156
  continue
157
-
158
- def preprocess_model_name(m):
159
- if m == "Playground v2":
160
- return 'playground_PlayGroundV2_generation'
161
- if m == "Playground v2.5":
162
- return 'playground_PlayGroundV2.5_generation'
163
- return m
164
- models = [preprocess_model_name(m) for m in models]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
  # Replace bard with palm
 
167
  if task_name == "image_editing":
168
- valid = True
169
- for _model in models:
170
- try:
171
- platform, model_name, task = _model.split("_")
172
- except ValueError:
173
- valid = False
174
- break
175
- if not (platform in ["playground", "imagenhub"] and task == "edition"):
176
- valid = False
177
- break
178
- if not valid:
179
  ct_invalid += 1
180
  continue
181
- for i, _model in enumerate(models):
182
- platform, model_name, task = _model.split("_")
183
- models[i] = model_name
184
-
185
  elif task_name == "t2i_generation":
186
- valid = True
187
- for _model in models:
188
- try:
189
- platform, model_name, task = _model.split("_")
190
- except ValueError:
191
- valid = False
192
- break
193
- if not (platform.lower() in ["playground", "imagenhub", 'fal'] and (task == "generation" or task == "text2image")):
194
- valid = False
195
- break
196
- if not valid:
197
  ct_invalid += 1
198
  continue
199
- for i, _model in enumerate(models):
200
- platform, model_name, task = _model.split("_")
201
- models[i] = model_name
202
-
203
- elif task_name == "video_generation":
204
- valid = True
205
- for _model in models:
206
- try:
207
- platform, model_name, task = _model.split("_")
208
- except ValueError:
209
- valid = False
210
- break
211
- if not (platform in ["videogenhub", "fal"] and task == "generation" or task == "text2video"):
212
- valid = False
213
- break
214
- if not valid:
215
- ct_invalid += 1
216
- continue
217
- for i, _model in enumerate(models):
218
- platform, model_name, task = _model.split("_")
219
- models[i] = model_name
220
 
221
  else:
222
  raise ValueError(f"Invalid task_name: {task_name}")
223
 
224
- models = [replace_model_name(m, row["tstamp"]) for m in models]
225
-
226
  # Exclude certain models
227
  if exclude_model_names and any(x in exclude_model_names for x in models):
228
  ct_invalid += 1
229
  continue
 
 
 
 
 
 
 
 
 
 
 
230
 
231
  if mode == "conv_release":
232
  # assert the two images are the same
@@ -251,6 +262,14 @@ def clean_battle_data(
251
  continue
252
 
253
 
 
 
 
 
 
 
 
 
254
  ip = row["ip"]
255
  if ip not in all_ips:
256
  all_ips[ip] = {"ip": ip, "count": 0, "sanitized_id": len(all_ips)}
@@ -262,45 +281,21 @@ def clean_battle_data(
262
 
263
  if ban_ip_list is not None and ip in ban_ip_list:
264
  ct_banned += 1
265
- print(f"User {user_id} is banned")
266
  continue
267
- required_keys_each_task = {
268
- "image_editing": ["source_prompt", "target_prompt", "instruct_prompt"],
269
- "t2i_generation": ["prompt"],
270
- "video_generation": ["prompt"]
271
- }
272
-
273
- model_a_inputs = row["states"][0].copy()
274
- # pop conv_id and model_name
275
- model_a_inputs.pop("conv_id")
276
- model_a_inputs.pop("model_name")
277
- model_b_inputs = row["states"][1].copy()
278
- model_b_inputs.pop("conv_id")
279
- model_b_inputs.pop("model_name")
280
- for key in model_a_inputs:
281
- if not (key in model_b_inputs and model_a_inputs[key] == model_b_inputs[key]):
282
- print(f"Inconsistent inputs: {model_a_inputs} vs {model_b_inputs}")
283
- ct_invalid += 1
284
- continue
285
- if mode == "conv_release":
286
- if any(key not in model_a_inputs for key in required_keys_each_task[task_name]):
287
- print(f"Missing required keys: {model_a_inputs}, {required_keys_each_task[task_name]}")
288
- ct_invalid += 1
289
- continue
290
-
291
- inputs = model_a_inputs
292
  # Save the results
293
  battles.append(
294
  dict(
295
- model_a_conv_id=row["states"][0]["conv_id"],
296
- model_b_conv_id=row["states"][1]["conv_id"],
297
- inputs=inputs,
298
  model_a=models[0],
299
  model_b=models[1],
300
- vote_type=row["type"],
301
  winner=convert_type[row["type"]],
302
  judge=f"arena_user_{user_id}",
 
 
 
303
  anony=anony,
 
304
  tstamp=row["tstamp"],
305
  )
306
  )
@@ -337,7 +332,7 @@ if __name__ == "__main__":
337
  parser.add_argument(
338
  "--mode", type=str, choices=["simple", "conv_release"], default="simple"
339
  )
340
- parser.add_argument("--task_name", type=str, default="image_editing", choices=["image_editing", "t2i_generation", "video_generation"])
341
  parser.add_argument("--exclude-model-names", type=str, nargs="+")
342
  parser.add_argument("--ban-ip-file", type=str)
343
  parser.add_argument("--sanitize-ip", action="store_true", default=False)
@@ -355,19 +350,27 @@ if __name__ == "__main__":
355
  ).strftime("%Y%m%d")
356
 
357
  if args.mode == "simple":
358
- # for x in battles:
359
- # for key in [
360
- # "conversation_a",
361
- # "conversation_b",
362
- # "question_id",
363
- # ]:
364
- # if key in x:
365
- # del x[key]
366
  print("Samples:")
367
  for i in range(min(4, len(battles))):
368
  print(battles[i])
369
  output = f"clean_battle_{args.task_name}_{cutoff_date}.json"
370
  elif args.mode == "conv_release":
 
 
 
 
 
 
 
 
371
  output = f"clean_battle_{args.task_name}_conv_{cutoff_date}.json"
372
 
373
  with open(output, "w") as fout:
 
18
  from tqdm import tqdm
19
 
20
  from .basic_stats import get_log_files, NUM_SERVERS, LOG_ROOT_DIR
21
+ from .utils import detect_language, get_time_stamp_from_date
22
 
23
  VOTES = ["tievote", "leftvote", "rightvote", "bothbad_vote"]
24
+ IDENTITY_WORDS = [
25
+ "vicuna",
26
+ "lmsys",
27
+ "koala",
28
+ "uc berkeley",
29
+ "open assistant",
30
+ "laion",
31
+ "chatglm",
32
+ "chatgpt",
33
+ "gpt-4",
34
+ "openai",
35
+ "anthropic",
36
+ "claude",
37
+ "bard",
38
+ "palm",
39
+ "lamda",
40
+ "google",
41
+ "llama",
42
+ "qianwan",
43
+ "alibaba",
44
+ "mistral",
45
+ "zhipu",
46
+ "KEG lab",
47
+ "01.AI",
48
+ "AI2",
49
+ "Tülu",
50
+ "Tulu",
51
+ "NETWORK ERROR DUE TO HIGH TRAFFIC. PLEASE REGENERATE OR REFRESH THIS PAGE.",
52
+ "$MODERATION$ YOUR INPUT VIOLATES OUR CONTENT MODERATION GUIDELINES.",
53
+ "API REQUEST ERROR. Please increase the number of max tokens.",
54
+ "**API REQUEST ERROR** Reason: The response was blocked.",
55
+ "**API REQUEST ERROR**",
56
+ ]
57
+
58
+ for i in range(len(IDENTITY_WORDS)):
59
+ IDENTITY_WORDS[i] = IDENTITY_WORDS[i].lower()
60
 
 
 
 
61
 
62
  def remove_html(raw):
63
  if raw.startswith("<h3>"):
 
77
 
78
  def replace_model_name(old_name, tstamp):
79
  replace_dict = {
80
+ "bard": "palm-2",
81
+ "claude-v1": "claude-1",
82
+ "claude-instant-v1": "claude-instant-1",
83
+ "oasst-sft-1-pythia-12b": "oasst-pythia-12b",
84
+ "claude-2": "claude-2.0",
85
  }
86
+ if old_name in ["gpt-4", "gpt-3.5-turbo"]:
87
+ if tstamp > 1687849200:
88
+ return old_name + "-0613"
89
+ else:
90
+ return old_name + "-0314"
91
  if old_name in replace_dict:
92
+ return replace_dict[old_name]
 
 
 
 
 
93
  return old_name
94
 
95
 
 
105
  break
106
  except FileNotFoundError:
107
  time.sleep(2)
 
 
 
 
108
  return data
109
 
110
 
111
  def read_file_parallel(log_files, num_threads=16):
112
  data_all = []
113
+ from multiprocessing import Pool
114
+
115
+ with Pool(num_threads) as p:
116
+ ret_all = list(tqdm(p.imap(read_file, log_files), total=len(log_files)))
117
+ for ret in ret_all:
118
+ data_all.extend(ret)
119
+ return data_all
 
 
 
 
 
120
 
121
  def load_image(image_path):
122
  try:
 
127
  def clean_battle_data(
128
  log_files, exclude_model_names, ban_ip_list=None, sanitize_ip=False, mode="simple", task_name="image_editing"
129
  ):
130
+ data = read_file_parallel(log_files, num_threads=16)
131
 
132
  convert_type = {
133
  "leftvote": "model_a",
 
145
  battles = []
146
  for row in tqdm(data, desc="Cleaning"):
147
  if row["models"][0] is None or row["models"][1] is None:
 
148
  continue
149
 
150
  # Resolve model names
 
163
  models_public[1] == "" and models_public[0] != ""
164
  ):
165
  ct_invalid += 1
 
166
  continue
167
 
168
  if models_public[0] == "" or models_public[0] == "Model A":
 
173
  anony = False
174
  models = models_public
175
  if not models_public == models_hidden:
 
176
  ct_invalid += 1
177
  continue
178
+
179
+ # # Detect langauge
180
+ # state = row["states"][0]
181
+ # if state["offset"] >= len(state["messages"]):
182
+ # ct_invalid += 1
183
+ # continue
184
+ # lang_code = detect_language(state["messages"][state["offset"]][1])
185
+
186
+ # # Drop conversations if the model names are leaked
187
+ # leaked_identity = False
188
+ # messages = ""
189
+ # for i in range(2):
190
+ # state = row["states"][i]
191
+ # for turn_idx, (role, msg) in enumerate(
192
+ # state["messages"][state["offset"] :]
193
+ # ):
194
+ # if msg:
195
+ # messages += msg.lower()
196
+ # for word in IDENTITY_WORDS:
197
+ # if word in messages:
198
+ # leaked_identity = True
199
+ # break
200
+
201
+ # if leaked_identity:
202
+ # ct_leaked_identity += 1
203
+ # continue
204
 
205
  # Replace bard with palm
206
+ models = [replace_model_name(m, row["tstamp"]) for m in models]
207
  if task_name == "image_editing":
208
+ if not all(x.startswith("imagenhub_") and x.endswith("_edition") for x in models):
209
+ # print(f"Invalid model names: {models}")
 
 
 
 
 
 
 
 
 
210
  ct_invalid += 1
211
  continue
212
+ models = [x[len("imagenhub_"):-len("_edition")] for x in models]
 
 
 
213
  elif task_name == "t2i_generation":
214
+ if not all("playground" in x.lower() or (x.startswith("imagenhub_") and x.endswith("_generation")) for x in models):
215
+ # print(f"Invalid model names: {models}")
 
 
 
 
 
 
 
 
 
216
  ct_invalid += 1
217
  continue
218
+ # models = [x[len("imagenhub_"):-len("_generation")] for x in models]
219
+ for i, model_name in enumerate(models):
220
+ if model_name.startswith("imagenhub_"):
221
+ models[i] = model_name[len("imagenhub_"):-len("_generation")]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
  else:
224
  raise ValueError(f"Invalid task_name: {task_name}")
225
 
 
 
226
  # Exclude certain models
227
  if exclude_model_names and any(x in exclude_model_names for x in models):
228
  ct_invalid += 1
229
  continue
230
+
231
+ # if models[0] not in model_infos or models[1] not in model_infos:
232
+ # continue
233
+
234
+ # # Exclude votes before the starting date
235
+ # if model_infos and (model_infos[models[0]]["starting_from"] > row["tstamp"] or model_infos[models[1]]["starting_from"] > row["tstamp"]):
236
+ # print(f"Invalid vote before the valid starting date for {models[0]} and {models[1]}")
237
+ # ct_invalid += 1
238
+ # continue
239
+
240
+
241
 
242
  if mode == "conv_release":
243
  # assert the two images are the same
 
262
  continue
263
 
264
 
265
+ question_id = row["states"][0]["conv_id"]
266
+ # conversation_a = to_openai_format(
267
+ # row["states"][0]["messages"][row["states"][0]["offset"] :]
268
+ # )
269
+ # conversation_b = to_openai_format(
270
+ # row["states"][1]["messages"][row["states"][1]["offset"] :]
271
+ # )
272
+
273
  ip = row["ip"]
274
  if ip not in all_ips:
275
  all_ips[ip] = {"ip": ip, "count": 0, "sanitized_id": len(all_ips)}
 
281
 
282
  if ban_ip_list is not None and ip in ban_ip_list:
283
  ct_banned += 1
 
284
  continue
285
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  # Save the results
287
  battles.append(
288
  dict(
289
+ question_id=question_id,
 
 
290
  model_a=models[0],
291
  model_b=models[1],
 
292
  winner=convert_type[row["type"]],
293
  judge=f"arena_user_{user_id}",
294
+ # conversation_a=conversation_a,
295
+ # conversation_b=conversation_b,
296
+ # turn=len(conversation_a) // 2,
297
  anony=anony,
298
+ # language=lang_code,
299
  tstamp=row["tstamp"],
300
  )
301
  )
 
332
  parser.add_argument(
333
  "--mode", type=str, choices=["simple", "conv_release"], default="simple"
334
  )
335
+ parser.add_argument("--task_name", type=str, default="image_editing", choices=["image_editing", "t2i_generation"])
336
  parser.add_argument("--exclude-model-names", type=str, nargs="+")
337
  parser.add_argument("--ban-ip-file", type=str)
338
  parser.add_argument("--sanitize-ip", action="store_true", default=False)
 
350
  ).strftime("%Y%m%d")
351
 
352
  if args.mode == "simple":
353
+ for x in battles:
354
+ for key in [
355
+ "conversation_a",
356
+ "conversation_b",
357
+ "question_id",
358
+ ]:
359
+ if key in x:
360
+ del x[key]
361
  print("Samples:")
362
  for i in range(min(4, len(battles))):
363
  print(battles[i])
364
  output = f"clean_battle_{args.task_name}_{cutoff_date}.json"
365
  elif args.mode == "conv_release":
366
+ # new_battles = []
367
+ # for x in battles:
368
+ # if not x["anony"]:
369
+ # continue
370
+ # for key in []:
371
+ # del x[key]
372
+ # new_battles.append(x)
373
+ # battles = new_battles
374
  output = f"clean_battle_{args.task_name}_conv_{cutoff_date}.json"
375
 
376
  with open(output, "w") as fout:
arena_elo/elo_rating/elo_analysis.py CHANGED
@@ -11,9 +11,9 @@ import pandas as pd
11
  import plotly.express as px
12
  from tqdm import tqdm
13
 
 
14
  from .basic_stats import get_log_files
15
  from .clean_battle_data import clean_battle_data
16
- from .utils import get_model_info
17
 
18
  pd.options.display.float_format = "{:.2f}".format
19
 
@@ -214,9 +214,8 @@ def visualize_average_win_rate(battles, limit_show_number):
214
  width=700,
215
  )
216
  fig.update_layout(
217
- yaxis_title="Average Win Rate", xaxis_title="Model", showlegend=False,
218
  )
219
- fig.update_traces(textfont_size=16)
220
  return fig
221
 
222
 
@@ -247,7 +246,6 @@ def visualize_bootstrap_elo_rating(df, df_final, limit_show_number):
247
  width=700,
248
  )
249
  fig.update_layout(xaxis_title="Model", yaxis_title="Rating")
250
- fig.update_traces(textfont_size=16)
251
  return fig
252
 
253
 
@@ -340,7 +338,6 @@ if __name__ == "__main__":
340
  "--rating-system", type=str, choices=["bt", "elo"], default="bt"
341
  )
342
  parser.add_argument("--exclude-tie", action="store_true", default=False)
343
- parser.add_argument("--min_num_battles_per_model", type=int, default=25)
344
  args = parser.parse_args()
345
 
346
  np.random.seed(42)
@@ -352,23 +349,7 @@ if __name__ == "__main__":
352
  # Read data from all log files
353
  log_files = get_log_files(args.max_num_files)
354
  battles = clean_battle_data(log_files)
355
-
356
- if args.min_num_battles_per_model:
357
- num_battles_per_model = defaultdict(int)
358
- # use pd
359
- for _, battle in battles.iterrows():
360
- num_battles_per_model[battle["model_a"]] += 1
361
- num_battles_per_model[battle["model_b"]] += 1
362
- to_remove_models = [
363
- model for model, num_battles in num_battles_per_model.items() if num_battles < args.min_num_battles_per_model
364
- ]
365
- battles_with_enough_battles = battles[
366
- ~battles["model_a"].isin(to_remove_models) & ~battles["model_b"].isin(to_remove_models)
367
- ]
368
- print(f"Remove models with less than {args.min_num_battles_per_model} battles: {to_remove_models}")
369
- print(f"Number of battles: {len(battles)} -> {len(battles_with_enough_battles)}")
370
- battles = battles_with_enough_battles
371
-
372
  anony_results = report_elo_analysis_results(
373
  battles, rating_system=args.rating_system, num_bootstrap=args.num_bootstrap, anony_only=True
374
  )
@@ -381,22 +362,9 @@ if __name__ == "__main__":
381
  pretty_print_elo_rating(anony_results["elo_rating_online"])
382
  print("# Median")
383
  pretty_print_elo_rating(anony_results["elo_rating_final"])
384
- print(f"Annoy last update : {anony_results['last_updated_datetime']}")
385
- print(f"Full last update : {full_results['last_updated_datetime']}")
386
-
387
-
388
- # # save heatmap results in the same directory of the cleaned battle file
389
- win_fraction_heatmap_file = args.clean_battle_file.replace(".json", "_win_fraction_heatmap.jpg")
390
- battle_count_heatmap_file = args.clean_battle_file.replace(".json", "_battle_count_heatmap.jpg")
391
- average_win_rate_bar_file = args.clean_battle_file.replace(".json", "_average_win_rate_bar.jpg")
392
- bootstrap_elo_rating_file = args.clean_battle_file.replace(".json", "_bootstrap_elo_rating.jpg")
393
- anony_results["win_fraction_heatmap"].write_image(win_fraction_heatmap_file)
394
- anony_results["battle_count_heatmap"].write_image(battle_count_heatmap_file)
395
- anony_results["average_win_rate_bar"].write_image(average_win_rate_bar_file)
396
- anony_results["bootstrap_elo_rating"].write_image(bootstrap_elo_rating_file)
397
-
398
 
399
- last_updated_tstamp = full_results["last_updated_tstamp"]
400
  cutoff_date = datetime.datetime.fromtimestamp(
401
  last_updated_tstamp, tz=timezone("US/Pacific")
402
  ).strftime("%Y%m%d")
@@ -408,6 +376,3 @@ if __name__ == "__main__":
408
  }
409
  with open(f"elo_results_{cutoff_date}.pkl", "wb") as fout:
410
  pickle.dump(results, fout)
411
-
412
- with open("cut_off_date.txt", "w") as fout:
413
- fout.write(cutoff_date)
 
11
  import plotly.express as px
12
  from tqdm import tqdm
13
 
14
+ from .model_registry import get_model_info
15
  from .basic_stats import get_log_files
16
  from .clean_battle_data import clean_battle_data
 
17
 
18
  pd.options.display.float_format = "{:.2f}".format
19
 
 
214
  width=700,
215
  )
216
  fig.update_layout(
217
+ yaxis_title="Average Win Rate", xaxis_title="Model", showlegend=False
218
  )
 
219
  return fig
220
 
221
 
 
246
  width=700,
247
  )
248
  fig.update_layout(xaxis_title="Model", yaxis_title="Rating")
 
249
  return fig
250
 
251
 
 
338
  "--rating-system", type=str, choices=["bt", "elo"], default="bt"
339
  )
340
  parser.add_argument("--exclude-tie", action="store_true", default=False)
 
341
  args = parser.parse_args()
342
 
343
  np.random.seed(42)
 
349
  # Read data from all log files
350
  log_files = get_log_files(args.max_num_files)
351
  battles = clean_battle_data(log_files)
352
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
  anony_results = report_elo_analysis_results(
354
  battles, rating_system=args.rating_system, num_bootstrap=args.num_bootstrap, anony_only=True
355
  )
 
362
  pretty_print_elo_rating(anony_results["elo_rating_online"])
363
  print("# Median")
364
  pretty_print_elo_rating(anony_results["elo_rating_final"])
365
+ print(f"last update : {anony_results['last_updated_datetime']}")
 
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
+ last_updated_tstamp = anony_results["last_updated_tstamp"]
368
  cutoff_date = datetime.datetime.fromtimestamp(
369
  last_updated_tstamp, tz=timezone("US/Pacific")
370
  ).strftime("%Y%m%d")
 
376
  }
377
  with open(f"elo_results_{cutoff_date}.pkl", "wb") as fout:
378
  pickle.dump(results, fout)
 
 
 
arena_elo/elo_rating/generate_leaderboard.py CHANGED
@@ -2,12 +2,15 @@ import fire
2
  import json
3
  import pandas as pd
4
  import pickle
5
- from .utils import get_model_info
6
 
7
  def main(
8
- elo_rating_pkl: str,
9
- output_csv: str
 
10
  ):
 
 
11
  with open(elo_rating_pkl, "rb") as fin:
12
  elo_rating_results = pickle.load(fin)
13
 
@@ -16,23 +19,19 @@ def main(
16
  anony_leaderboard_data = anony_elo_rating_results["leaderboard_table_df"]
17
  full_leaderboard_data = full_elo_rating_results["leaderboard_table_df"]
18
 
19
- print(anony_leaderboard_data)
20
  # Model,MT-bench (score),Arena Elo rating,MMLU,License,Link
21
  fields = ["key", "Model", "Arena Elo rating (anony)", "Arena Elo rating (full)", "License", "Organization", "Link"]
22
  # set Organization and license to empty for now
23
  all_models = anony_leaderboard_data.index.tolist()
24
 
25
- model_info = {}
26
  for model in all_models:
27
-
28
- registered_model_info = get_model_info(model)
29
- model_info[model] = {
30
- "key": model,
31
- "Model": model,
32
- "License": registered_model_info.license,
33
- "Organization": registered_model_info.organization,
34
- "Link": registered_model_info.link
35
- }
36
 
37
  if model in anony_leaderboard_data.index:
38
  model_info[model]["Arena Elo rating (anony)"] = anony_leaderboard_data.loc[model, "rating"]
@@ -43,6 +42,10 @@ def main(
43
  model_info[model]["Arena Elo rating (full)"] = full_leaderboard_data.loc[model, "rating"]
44
  else:
45
  model_info[model]["Arena Elo rating (full)"] = 0
 
 
 
 
46
 
47
  final_model_info = {}
48
  for model in model_info:
 
2
  import json
3
  import pandas as pd
4
  import pickle
5
+
6
 
7
  def main(
8
+ model_info_file: str,
9
+ elo_rating_pkl: str,
10
+ output_csv: str
11
  ):
12
+ model_info = json.load(open(model_info_file))
13
+
14
  with open(elo_rating_pkl, "rb") as fin:
15
  elo_rating_results = pickle.load(fin)
16
 
 
19
  anony_leaderboard_data = anony_elo_rating_results["leaderboard_table_df"]
20
  full_leaderboard_data = full_elo_rating_results["leaderboard_table_df"]
21
 
 
22
  # Model,MT-bench (score),Arena Elo rating,MMLU,License,Link
23
  fields = ["key", "Model", "Arena Elo rating (anony)", "Arena Elo rating (full)", "License", "Organization", "Link"]
24
  # set Organization and license to empty for now
25
  all_models = anony_leaderboard_data.index.tolist()
26
 
 
27
  for model in all_models:
28
+ if not model in model_info:
29
+ model_info[model] = {}
30
+ model_info[model]["License"] = "N/A"
31
+ model_info[model]["Organization"] = "N/A"
32
+ model_info[model]["Link"] = "N/A"
33
+ model_info[model]["Model"] = model
34
+ model_info[model]["key"] = model
 
 
35
 
36
  if model in anony_leaderboard_data.index:
37
  model_info[model]["Arena Elo rating (anony)"] = anony_leaderboard_data.loc[model, "rating"]
 
42
  model_info[model]["Arena Elo rating (full)"] = full_leaderboard_data.loc[model, "rating"]
43
  else:
44
  model_info[model]["Arena Elo rating (full)"] = 0
45
+ # if model in anony_leaderboard_data.index:
46
+ # model_info[model]["Arena Elo rating"] = anony_leaderboard_data.loc[model, "rating"]
47
+ # else:
48
+ # model_info[model]["Arena Elo rating"] = 0
49
 
50
  final_model_info = {}
51
  for model in model_info:
arena_elo/elo_rating/model_registry.py ADDED
@@ -0,0 +1,578 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Additional information of the models."""
2
+ from collections import namedtuple, OrderedDict
3
+ from typing import List
4
+
5
+
6
+ ModelInfo = namedtuple("ModelInfo", ["simple_name", "link", "description"])
7
+
8
+
9
+ model_info = OrderedDict()
10
+
11
+
12
+ def register_model_info(
13
+ full_names: List[str], simple_name: str, link: str, description: str
14
+ ):
15
+ info = ModelInfo(simple_name, link, description)
16
+
17
+ for full_name in full_names:
18
+ model_info[full_name] = info
19
+
20
+
21
+ def get_model_info(name: str) -> ModelInfo:
22
+ if name in model_info:
23
+ return model_info[name]
24
+ else:
25
+ # To fix this, please use `register_model_info` to register your model
26
+ return ModelInfo(
27
+ name, "", "Register the description at arena.model/model_registry.py"
28
+ )
29
+
30
+
31
+ register_model_info(
32
+ [
33
+ "IEITYuan/Yuan2-2B-Janus-hf",
34
+ "IEITYuan/Yuan2-2B-hf",
35
+ "IEITYuan/Yuan2-51B-hf",
36
+ "IEITYuan/Yuan2-102B-hf",
37
+ ],
38
+ "IEIT-Yuan2",
39
+ "https://github.com/IEIT-Yuan/Yuan-2.0",
40
+ "Yuan2.0 is a new generation Fundamental Large Language Model developed by IEIT System.",
41
+ )
42
+
43
+ register_model_info(
44
+ ["mixtral-8x7b-instruct-v0.1", "mistral-7b-instruct"],
45
+ "Mixtral of experts",
46
+ "https://mistral.ai/news/mixtral-of-experts/",
47
+ "A Mixture-of-Experts model by Mistral AI",
48
+ )
49
+
50
+ register_model_info(
51
+ ["gemini-pro"],
52
+ "Gemini",
53
+ "https://blog.google/technology/ai/google-gemini-pro-imagen-duet-ai-update/",
54
+ "Gemini by Google",
55
+ )
56
+
57
+ register_model_info(
58
+ ["gemini-pro-vision"],
59
+ "Gemini",
60
+ "https://blog.google/technology/ai/google-gemini-pro-imagen-duet-ai-update/",
61
+ "Gemini by Google",
62
+ )
63
+
64
+ register_model_info(
65
+ ["solar-10.7b-instruct-v1.0"],
66
+ "SOLAR-10.7B-Instruct",
67
+ "https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0",
68
+ "A model trained using depth up-scaling by Upstage AI",
69
+ )
70
+
71
+ register_model_info(
72
+ ["gpt-4-turbo"],
73
+ "GPT-4-Turbo",
74
+ "https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo",
75
+ "GPT-4-Turbo by OpenAI",
76
+ )
77
+
78
+ register_model_info(
79
+ ["gpt-4-vision-preview"],
80
+ "gpt-4-vision-preview",
81
+ "https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo",
82
+ "GPT-4(V) by OpenAI",
83
+ )
84
+
85
+ register_model_info(
86
+ ["gpt-3.5-turbo", "gpt-3.5-turbo-0314", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-1106"],
87
+ "GPT-3.5",
88
+ "https://platform.openai.com/docs/models/gpt-3-5",
89
+ "GPT-3.5-Turbo by OpenAI",
90
+ )
91
+
92
+ register_model_info(
93
+ ["gpt-4", "gpt-4-0314", "gpt-4-0613"],
94
+ "GPT-4",
95
+ "https://openai.com/research/gpt-4",
96
+ "GPT-4 by OpenAI",
97
+ )
98
+
99
+ register_model_info(
100
+ ["claude-2.1", "claude-2.0"],
101
+ "Claude",
102
+ "https://www.anthropic.com/index/claude-2",
103
+ "Claude 2 by Anthropic",
104
+ )
105
+
106
+ register_model_info(
107
+ ["claude-1"],
108
+ "Claude",
109
+ "https://www.anthropic.com/index/introducing-claude",
110
+ "Claude 1 by Anthropic",
111
+ )
112
+
113
+ register_model_info(
114
+ ["claude-instant-1", "claude-instant-1.2"],
115
+ "Claude Instant",
116
+ "https://www.anthropic.com/index/introducing-claude",
117
+ "Claude Instant by Anthropic",
118
+ )
119
+
120
+ register_model_info(
121
+ ["pplx-70b-online", "pplx-7b-online"],
122
+ "pplx-online-llms",
123
+ "https://blog.perplexity.ai/blog/introducing-pplx-online-llms",
124
+ "Online LLM API by Perplexity AI",
125
+ )
126
+
127
+ register_model_info(
128
+ ["openhermes-2.5-mistral-7b"],
129
+ "OpenHermes-2.5-Mistral-7B",
130
+ "https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B",
131
+ "a mistral-based model fine-tuned on 1M GPT-4 outputs",
132
+ )
133
+
134
+ register_model_info(
135
+ ["starling-lm-7b-alpha"],
136
+ "Starling-LM-7B-alpha",
137
+ "https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha",
138
+ "an open model trained using RLAIF by Berkeley",
139
+ )
140
+
141
+ register_model_info(
142
+ ["tulu-2-dpo-70b"],
143
+ "Tulu 2",
144
+ "https://huggingface.co/allenai/tulu-2-dpo-70b",
145
+ "an instruction and RLHF model by UW/AllenAI",
146
+ )
147
+
148
+ register_model_info(
149
+ ["yi-34b-chat", "yi-6b-chat"],
150
+ "Yi-Chat",
151
+ "https://huggingface.co/01-ai/Yi-34B-Chat",
152
+ "A large language model by 01 AI",
153
+ )
154
+
155
+ register_model_info(
156
+ ["llama-2-70b-chat", "llama-2-34b-chat", "llama-2-13b-chat", "llama-2-7b-chat"],
157
+ "Llama 2",
158
+ "https://ai.meta.com/llama/",
159
+ "open foundation and fine-tuned chat models by Meta",
160
+ )
161
+
162
+ register_model_info(
163
+ [
164
+ "vicuna-33b",
165
+ "vicuna-33b-v1.3",
166
+ "vicuna-13b",
167
+ "vicuna-13b-v1.3",
168
+ "vicuna-7b",
169
+ "vicuna-7b-v1.3",
170
+ ],
171
+ "Vicuna",
172
+ "https://lmsys.org/blog/2023-03-30-vicuna/",
173
+ "a chat assistant fine-tuned on user-shared conversations by LMSYS",
174
+ )
175
+
176
+ register_model_info(
177
+ ["chatglm3-6b", "chatglm2-6b", "chatglm-6b"],
178
+ "ChatGLM",
179
+ "https://chatglm.cn/blog",
180
+ "an open bilingual dialogue language model by Tsinghua University",
181
+ )
182
+
183
+ register_model_info(
184
+ ["openchat-3.5"],
185
+ "OpenChat 3.5",
186
+ "https://github.com/imoneoi/openchat",
187
+ "an open model fine-tuned on Mistral-7B using C-RLFT",
188
+ )
189
+
190
+ register_model_info(
191
+ ["tenyxchat-7b-v1"],
192
+ "TenyxChat-7B",
193
+ "https://huggingface.co/tenyx/TenyxChat-7B-v1",
194
+ "an open model DPO trained on top of OpenChat-3.5 using Tenyx fine-tuning",
195
+ )
196
+
197
+ register_model_info(
198
+ ["zephyr-7b-beta", "zephyr-7b-alpha"],
199
+ "Zephyr",
200
+ "https://huggingface.co/HuggingFaceH4/zephyr-7b-alpha",
201
+ "a chatbot fine-tuned from Mistral by Hugging Face",
202
+ )
203
+
204
+ register_model_info(
205
+ ["notus-7b-v1"],
206
+ "Notus",
207
+ "https://huggingface.co/argilla/notus-7b-v1",
208
+ "a chatbot fine-tuned from Zephyr SFT by Argilla",
209
+ )
210
+
211
+ register_model_info(
212
+ ["catppt"],
213
+ "CatPPT",
214
+ "https://huggingface.co/rishiraj/CatPPT",
215
+ "a chatbot fine-tuned from a SLERP merged model by Rishiraj Acharya",
216
+ )
217
+
218
+ register_model_info(
219
+ ["TinyLlama"],
220
+ "TinyLlama",
221
+ "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0",
222
+ "The TinyLlama project is an open endeavor to pretrain a 1.1B Llama model on 3 trillion tokens.",
223
+ )
224
+
225
+ register_model_info(
226
+ ["qwen-14b-chat"],
227
+ "Qwen",
228
+ "https://huggingface.co/Qwen/Qwen-14B-Chat",
229
+ "a large language model by Alibaba Cloud",
230
+ )
231
+
232
+ register_model_info(
233
+ ["codellama-34b-instruct", "codellama-13b-instruct", "codellama-7b-instruct"],
234
+ "Code Llama",
235
+ "https://ai.meta.com/blog/code-llama-large-language-model-coding/",
236
+ "open foundation models for code by Meta",
237
+ )
238
+
239
+ register_model_info(
240
+ ["wizardlm-70b", "wizardlm-30b", "wizardlm-13b"],
241
+ "WizardLM",
242
+ "https://github.com/nlpxucan/WizardLM",
243
+ "an instruction-following LLM using evol-instruct by Microsoft",
244
+ )
245
+
246
+ register_model_info(
247
+ ["wizardcoder-15b-v1.0"],
248
+ "WizardLM",
249
+ "https://github.com/nlpxucan/WizardLM/tree/main/WizardCoder",
250
+ "Empowering Code Large Language Models with Evol-Instruct",
251
+ )
252
+
253
+ register_model_info(
254
+ ["mpt-7b-chat", "mpt-30b-chat"],
255
+ "MPT-Chat",
256
+ "https://www.mosaicml.com/blog/mpt-30b",
257
+ "a chatbot fine-tuned from MPT by MosaicML",
258
+ )
259
+
260
+ register_model_info(
261
+ ["guanaco-33b", "guanaco-65b"],
262
+ "Guanaco",
263
+ "https://github.com/artidoro/qlora",
264
+ "a model fine-tuned with QLoRA by UW",
265
+ )
266
+
267
+ register_model_info(
268
+ ["gpt4all-13b-snoozy"],
269
+ "GPT4All-Snoozy",
270
+ "https://github.com/nomic-ai/gpt4all",
271
+ "a finetuned LLaMA model on assistant style data by Nomic AI",
272
+ )
273
+
274
+ register_model_info(
275
+ ["koala-13b"],
276
+ "Koala",
277
+ "https://bair.berkeley.edu/blog/2023/04/03/koala",
278
+ "a dialogue model for academic research by BAIR",
279
+ )
280
+
281
+ register_model_info(
282
+ ["RWKV-4-Raven-14B"],
283
+ "RWKV-4-Raven",
284
+ "https://huggingface.co/BlinkDL/rwkv-4-raven",
285
+ "an RNN with transformer-level LLM performance",
286
+ )
287
+
288
+ register_model_info(
289
+ ["alpaca-13b"],
290
+ "Alpaca",
291
+ "https://crfm.stanford.edu/2023/03/13/alpaca.html",
292
+ "a model fine-tuned from LLaMA on instruction-following demonstrations by Stanford",
293
+ )
294
+
295
+ register_model_info(
296
+ ["oasst-pythia-12b"],
297
+ "OpenAssistant (oasst)",
298
+ "https://open-assistant.io",
299
+ "an Open Assistant for everyone by LAION",
300
+ )
301
+
302
+ register_model_info(
303
+ ["oasst-sft-7-llama-30b"],
304
+ "OpenAssistant (oasst)",
305
+ "https://open-assistant.io",
306
+ "an Open Assistant for everyone by LAION",
307
+ )
308
+
309
+ register_model_info(
310
+ ["palm-2"],
311
+ "PaLM 2 Chat",
312
+ "https://cloud.google.com/vertex-ai/docs/release-notes#May_10_2023",
313
+ "PaLM 2 for Chat (chat-bison@001) by Google",
314
+ )
315
+
316
+ register_model_info(
317
+ ["llama-7b", "llama-13b"],
318
+ "LLaMA",
319
+ "https://arxiv.org/abs/2302.13971",
320
+ "open and efficient foundation language models by Meta",
321
+ )
322
+
323
+ register_model_info(
324
+ ["open-llama-7b-v2-open-instruct", "open-llama-7b-open-instruct"],
325
+ "Open LLaMa (Open Instruct)",
326
+ "https://medium.com/vmware-data-ml-blog/starter-llm-for-the-enterprise-instruction-tuning-openllama-7b-d05fc3bbaccc",
327
+ "Open LLaMa fine-tuned on instruction-following data by VMware",
328
+ )
329
+
330
+ register_model_info(
331
+ ["dolly-v2-12b"],
332
+ "Dolly",
333
+ "https://www.databricks.com/blog/2023/04/12/dolly-first-open-commercially-viable-instruction-tuned-llm",
334
+ "an instruction-tuned open large language model by Databricks",
335
+ )
336
+
337
+ register_model_info(
338
+ ["stablelm-tuned-alpha-7b"],
339
+ "StableLM",
340
+ "https://github.com/stability-AI/stableLM",
341
+ "Stability AI language models",
342
+ )
343
+
344
+ register_model_info(
345
+ ["codet5p-6b"],
346
+ "CodeT5p-6b",
347
+ "https://huggingface.co/Salesforce/codet5p-6b",
348
+ "Code completion model released by Salesforce",
349
+ )
350
+
351
+ register_model_info(
352
+ ["fastchat-t5-3b", "fastchat-t5-3b-v1.0"],
353
+ "FastChat-T5",
354
+ "https://huggingface.co/lmsys/fastchat-t5-3b-v1.0",
355
+ "a chat assistant fine-tuned from FLAN-T5 by LMSYS",
356
+ )
357
+
358
+ register_model_info(
359
+ ["phoenix-inst-chat-7b"],
360
+ "Phoenix-7B",
361
+ "https://huggingface.co/FreedomIntelligence/phoenix-inst-chat-7b",
362
+ "a multilingual chat assistant fine-tuned from Bloomz to democratize ChatGPT across languages by CUHK(SZ)",
363
+ )
364
+
365
+ register_model_info(
366
+ ["realm-7b-v1"],
367
+ "ReaLM",
368
+ "https://github.com/FreedomIntelligence/ReaLM",
369
+ "A chatbot fine-tuned from LLaMA2 with data generated via iterative calls to UserGPT and ChatGPT by CUHK(SZ) and SRIBD.",
370
+ )
371
+
372
+ register_model_info(
373
+ ["billa-7b-sft"],
374
+ "BiLLa-7B-SFT",
375
+ "https://huggingface.co/Neutralzz/BiLLa-7B-SFT",
376
+ "an instruction-tuned bilingual LLaMA with enhanced reasoning ability by an independent researcher",
377
+ )
378
+
379
+ register_model_info(
380
+ ["h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2"],
381
+ "h2oGPT-GM-7b",
382
+ "https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2",
383
+ "an instruction-tuned OpenLLaMA with enhanced conversational ability by H2O.ai",
384
+ )
385
+
386
+ register_model_info(
387
+ ["baize-v2-7b", "baize-v2-13b"],
388
+ "Baize v2",
389
+ "https://github.com/project-baize/baize-chatbot#v2",
390
+ "A chatbot fine-tuned from LLaMA with ChatGPT self-chat data and Self-Disillation with Feedback (SDF) by UCSD and SYSU.",
391
+ )
392
+
393
+ register_model_info(
394
+ [
395
+ "airoboros-l2-7b-2.1",
396
+ "airoboros-l2-13b-2.1",
397
+ "airoboros-c34b-2.1",
398
+ "airoboros-l2-70b-2.1",
399
+ ],
400
+ "airoboros",
401
+ "https://huggingface.co/jondurbin/airoboros-l2-70b-2.1",
402
+ "an instruction-tuned LlaMa model tuned with 100% synthetic instruction-response pairs from GPT4",
403
+ )
404
+
405
+ register_model_info(
406
+ [
407
+ "spicyboros-7b-2.2",
408
+ "spicyboros-13b-2.2",
409
+ "spicyboros-70b-2.2",
410
+ ],
411
+ "spicyboros",
412
+ "https://huggingface.co/jondurbin/spicyboros-70b-2.2",
413
+ "de-aligned versions of the airoboros models",
414
+ )
415
+
416
+ register_model_info(
417
+ ["Robin-7b-v2", "Robin-13b-v2", "Robin-33b-v2"],
418
+ "Robin-v2",
419
+ "https://huggingface.co/OptimalScale/robin-7b-v2-delta",
420
+ "A chatbot fine-tuned from LLaMA-7b, achieving competitive performance on chitchat, commonsense reasoning and instruction-following tasks, by OptimalScale, HKUST.",
421
+ )
422
+
423
+ register_model_info(
424
+ ["manticore-13b-chat"],
425
+ "Manticore 13B Chat",
426
+ "https://huggingface.co/openaccess-ai-collective/manticore-13b-chat-pyg",
427
+ "A chatbot fine-tuned from LlaMa across several CoT and chat datasets.",
428
+ )
429
+
430
+ register_model_info(
431
+ ["redpajama-incite-7b-chat"],
432
+ "RedPajama-INCITE-7B-Chat",
433
+ "https://huggingface.co/togethercomputer/RedPajama-INCITE-7B-Chat",
434
+ "A chatbot fine-tuned from RedPajama-INCITE-7B-Base by Together",
435
+ )
436
+
437
+ register_model_info(
438
+ [
439
+ "falcon-7b",
440
+ "falcon-7b-instruct",
441
+ "falcon-40b",
442
+ "falcon-40b-instruct",
443
+ "falcon-180b",
444
+ "falcon-180b-chat",
445
+ ],
446
+ "Falcon",
447
+ "https://huggingface.co/tiiuae/falcon-180B",
448
+ "TII's flagship series of large language models",
449
+ )
450
+
451
+ register_model_info(
452
+ ["tigerbot-7b-sft"],
453
+ "Tigerbot",
454
+ "https://huggingface.co/TigerResearch/tigerbot-7b-sft",
455
+ "TigerBot is a large-scale language model (LLM) with multiple languages and tasks.",
456
+ )
457
+
458
+ register_model_info(
459
+ ["internlm-chat-7b", "internlm-chat-7b-8k"],
460
+ "InternLM",
461
+ "https://huggingface.co/internlm/internlm-chat-7b",
462
+ "InternLM is a multi-language large-scale language model (LLM), developed by SHLAB.",
463
+ )
464
+
465
+ register_model_info(
466
+ ["Qwen-7B-Chat"],
467
+ "Qwen",
468
+ "https://huggingface.co/Qwen/Qwen-7B-Chat",
469
+ "Qwen is a multi-language large-scale language model (LLM), developed by Damo Academy.",
470
+ )
471
+
472
+ register_model_info(
473
+ ["Llama2-Chinese-13b-Chat", "LLama2-Chinese-13B"],
474
+ "Llama2-Chinese",
475
+ "https://huggingface.co/FlagAlpha/Llama2-Chinese-13b-Chat",
476
+ "Llama2-Chinese is a multi-language large-scale language model (LLM), developed by FlagAlpha.",
477
+ )
478
+
479
+ register_model_info(
480
+ ["Chinese-Alpaca-2-7B", "Chinese-Alpaca-2-13B"],
481
+ "Chinese-Alpaca",
482
+ "https://huggingface.co/hfl/chinese-alpaca-2-13b",
483
+ "New extended Chinese vocabulary beyond Llama-2, open-sourcing the Chinese LLaMA-2 and Alpaca-2 LLMs.",
484
+ )
485
+
486
+ register_model_info(
487
+ ["Vigogne-2-7B-Instruct", "Vigogne-2-13B-Instruct"],
488
+ "Vigogne-Instruct",
489
+ "https://huggingface.co/bofenghuang/vigogne-2-7b-instruct",
490
+ "Vigogne-Instruct is a French large language model (LLM) optimized for instruction-following, developed by Bofeng Huang",
491
+ )
492
+
493
+ register_model_info(
494
+ ["Vigogne-2-7B-Chat", "Vigogne-2-13B-Chat"],
495
+ "Vigogne-Chat",
496
+ "https://huggingface.co/bofenghuang/vigogne-2-7b-chat",
497
+ "Vigogne-Chat is a French large language model (LLM) optimized for instruction-following and multi-turn dialogues, developed by Bofeng Huang",
498
+ )
499
+
500
+ register_model_info(
501
+ ["stable-vicuna-13B-HF"],
502
+ "stable-vicuna",
503
+ "https://huggingface.co/TheBloke/stable-vicuna-13B-HF",
504
+ "StableVicuna is a Vicuna model fine-tuned using RLHF via PPO on various conversational and instructional datasets.",
505
+ )
506
+
507
+ register_model_info(
508
+ ["deluxe-chat-v1", "deluxe-chat-v1.1", "deluxe-chat-v1.2"],
509
+ "DeluxeChat",
510
+ "",
511
+ "Deluxe Chat",
512
+ )
513
+
514
+ register_model_info(
515
+ [
516
+ "Xwin-LM-7B-V0.1",
517
+ "Xwin-LM-13B-V0.1",
518
+ "Xwin-LM-70B-V0.1",
519
+ "Xwin-LM-7B-V0.2",
520
+ "Xwin-LM-13B-V0.2",
521
+ ],
522
+ "Xwin-LM",
523
+ "https://github.com/Xwin-LM/Xwin-LM",
524
+ "Chat models developed by Xwin-LM team",
525
+ )
526
+
527
+ register_model_info(
528
+ ["lemur-70b-chat"],
529
+ "Lemur-Chat",
530
+ "https://huggingface.co/OpenLemur/lemur-70b-chat-v1",
531
+ "an openly accessible language model optimized for both natural language and coding capabilities ",
532
+ )
533
+
534
+ register_model_info(
535
+ ["Mistral-7B-OpenOrca"],
536
+ "Open-Orca",
537
+ "https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca",
538
+ "A fine-tune of [Mistral 7B](https://huggingface.co/mistralai/Mistral-7B-v0.1) using [OpenOrca dataset](https://huggingface.co/datasets/Open-Orca/OpenOrca)",
539
+ )
540
+
541
+ register_model_info(
542
+ ["dolphin-2.2.1-mistral-7b"],
543
+ "dolphin-mistral",
544
+ "https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b",
545
+ "An uncensored fine-tuned Mistral 7B",
546
+ )
547
+
548
+ register_model_info(
549
+ [
550
+ "AquilaChat-7B",
551
+ "AquilaChat2-7B",
552
+ "AquilaChat2-34B",
553
+ ],
554
+ "Aquila-Chat",
555
+ "https://huggingface.co/BAAI/AquilaChat2-34B",
556
+ "Chat models developed by BAAI team",
557
+ )
558
+
559
+ register_model_info(
560
+ ["xDAN-L1-Chat-RL-v1"],
561
+ "xDAN-L1-Chat",
562
+ "https://huggingface.co/xDAN-AI/xDAN-L1-Chat-RL-v1",
563
+ "A large language chat model created by xDAN-AI.",
564
+ )
565
+
566
+ register_model_info(
567
+ ["MetaMath-70B-V1.0", "MetaMath-7B-V1.0"],
568
+ "MetaMath",
569
+ "https://huggingface.co/meta-math",
570
+ "MetaMath is a finetune of Llama2 on [MetaMathQA](https://huggingface.co/datasets/meta-math/MetaMathQA) that specializes in mathematical reasoning.",
571
+ )
572
+
573
+ register_model_info(
574
+ ["Yuan2-2B-hf", "Yuan2-51B-hf", "Yuan2-102B-hf"],
575
+ "IEIYuan",
576
+ "https://huggingface.co/IEITYuan",
577
+ "Yuan2 is a Basemodel developed by IEI.",
578
+ )
arena_elo/elo_rating/upload_battle_data.py CHANGED
@@ -2,36 +2,60 @@ import fire
2
  import json
3
  import os
4
  import datasets
5
- import random
6
  import datetime
7
  from pathlib import Path
8
  from datetime import datetime
9
  from PIL import Image
10
 
11
  datasets.config.DEFAULT_MAX_BATCH_SIZE = 500
12
-
13
- def create_hf_battle_dataset(data_file: str, split="test", task_type="t2i_generation"):
14
- if task_type == "t2i_generation":
15
- features = datasets.Features(
16
  {
17
- "index": datasets.Value("int32"),
18
- "tstamp": datasets.Value("int32"),
19
- "prompt": datasets.Value("string"),
20
- "left_model": datasets.Value("string"),
21
- "left_image": datasets.Image(),
22
- "right_model": datasets.Value("string"),
23
- "right_image": datasets.Image(),
24
- "vote_type": datasets.Value("string"),
25
- "winner": datasets.Value("string"),
26
- "anony": datasets.Value("bool"),
27
- "judge": datasets.Value("string"),
28
  }
29
- )
30
- else:
31
- raise ValueError(f"Task type {task_type} not supported")
 
 
 
32
  hf_dataset = datasets.Dataset.from_list(
33
  data_file,
34
- features=features,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  split=split,
36
  )
37
  return hf_dataset
@@ -57,105 +81,106 @@ def get_date_from_time_stamp(unix_timestamp: int):
57
  def load_battle_image(battle, log_dir):
58
  image_path = Path(log_dir) / f"{get_date_from_time_stamp(battle['tstamp'])}-convinput_images" / f"input_image_{battle['question_id']}.png"
59
  return load_image(image_path)
60
-
61
- def find_media_path(conv_id, task_type, log_dir):
62
- media_directory_map = {
63
- "t2i_generation": "images/generation",
64
- "image_edition": "images/edition",
65
- "text2video": "videos/generation"
66
- }
67
- if task_type == "t2i_generation":
68
- media_path = Path(log_dir) / media_directory_map[task_type] / f"{conv_id}.jpg"
69
- else:
70
- raise ValueError(f"Task type {task_type} not supported")
71
- return media_path
72
 
73
 
74
  def main(
75
- task_type='t2i_generation',
76
- # data_file: str = "./results/latest/clean_battle_conv.json",
77
- data_file: str = None,
78
- repo_id: str = "TIGER-Lab/GenAI-Arena-human-eval",
79
- log_dir: str = os.getenv("LOGDIR", "../GenAI-Arena-hf-logs/vote_log"),
80
- config_name='battle',
81
- split='test',
82
- token = os.environ.get("HUGGINGFACE_TOKEN", None),
83
- seed=42,
84
  ):
85
- if data_file is None:
86
- data_file = f"./results/latest/clean_battle_{task_type}.json"
87
- if not os.path.exists(data_file):
88
- raise ValueError(f"Data file {data_file} does not exist")
89
  with open(data_file, "r") as f:
90
  data = json.load(f)
91
 
92
- # add index according to the tsamp
93
- if seed is not None:
94
- random.seed(seed)
95
 
96
 
97
- data = sorted(data, key=lambda x: x['tstamp'])
98
- required_keys_each_task = {
99
- "image_editing": ["source_prompt", "target_prompt", "instruct_prompt"],
100
- "t2i_generation": ["prompt"],
101
- "video_generation": ["prompt"]
102
  }
103
- valid_data = []
104
- for i, battle in enumerate(data):
105
- if any(key not in battle['inputs'] for key in required_keys_each_task[task_type]):
106
- # print(battle['inputs'])
107
- # print(f"Skipping battle {i} due to missing keys")
108
- continue
109
- valid_data.append(battle)
110
- print(f"Total battles: {len(data)}, valid battles: {len(valid_data)}, removed battles: {len(data) - len(valid_data)}")
111
- data = valid_data
112
-
113
- # data = random.sample(data, 50 * 7+2)
114
-
115
- for i, battle in enumerate(data):
116
- battle['index'] = i
117
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
- new_data = []
120
- if task_type == 't2i_generation':
 
 
 
 
 
 
 
 
 
 
 
 
121
  for battle in data:
122
- prompt = battle['inputs']['prompt']
123
- model_a = battle['model_a']
124
- model_b = battle['model_b']
125
- model_a_conv_id = battle['model_a_conv_id']
126
- model_b_conv_id = battle['model_b_conv_id']
127
- tstamp = battle['tstamp']
128
- vote_type = battle['vote_type']
129
- left_image_path = find_media_path(model_a_conv_id, task_type, log_dir)
130
- right_image_path = find_media_path(model_b_conv_id, task_type, log_dir)
131
- left_image = load_image(left_image_path)
132
- right_image = load_image(right_image_path)
133
- if left_image is None or right_image is None:
134
- print(f"Skipping battle {battle['index']} due to missing images")
135
  continue
 
136
  new_data.append({
137
- "index": battle['index'],
138
- "tstamp": tstamp,
139
- "prompt": prompt,
140
- "left_model": model_a,
141
- "left_image": left_image,
142
- "right_model": model_b,
143
- "right_image": right_image,
144
- "vote_type": vote_type,
145
- "winner": battle['winner'],
146
- "anony": battle['anony'],
147
- "judge": battle['judge'],
148
  })
149
  split = "test"
150
- hf_dataset = create_hf_battle_dataset(new_data, split, task_type)
151
  else:
152
- raise ValueError(f"Task type {task_type} not supported")
153
 
 
154
  print(hf_dataset)
155
  print(f"Uploading to part {repo_id}:{split}...")
156
  hf_dataset.push_to_hub(
157
  repo_id=repo_id,
158
- config_name=config_name,
159
  split=split,
160
  token=token,
161
  commit_message=f"Add vision-arena {split} dataset",
 
2
  import json
3
  import os
4
  import datasets
 
5
  import datetime
6
  from pathlib import Path
7
  from datetime import datetime
8
  from PIL import Image
9
 
10
  datasets.config.DEFAULT_MAX_BATCH_SIZE = 500
11
+ def create_hf_dataset(data_file: str, split="test"):
12
+ hf_dataset = datasets.Dataset.from_list(
13
+ data_file,
14
+ features=datasets.Features(
15
  {
16
+ "question_id": datasets.Value("string"),
17
+ "model": datasets.Value("string"),
18
+ "conversation": [
19
+ {
20
+ "role": datasets.Value("string"),
21
+ "content": datasets.Value("string"),
22
+ }
23
+ ],
24
+ "language": datasets.Value("string"),
25
+ "image": datasets.Image(),
26
+ "turn": datasets.Value("int32"),
27
  }
28
+ ),
29
+ split=split,
30
+ )
31
+ return hf_dataset
32
+
33
+ def create_hf_battle_dataset(data_file: str, split="test"):
34
  hf_dataset = datasets.Dataset.from_list(
35
  data_file,
36
+ features=datasets.Features(
37
+ {
38
+ "question_id": datasets.Value("string"),
39
+ "model_a": datasets.Value("string"),
40
+ "model_b": datasets.Value("string"),
41
+ "conversation_a": [
42
+ {
43
+ "role": datasets.Value("string"),
44
+ "content": datasets.Value("string"),
45
+ }
46
+ ],
47
+ "conversation_b": [
48
+ {
49
+ "role": datasets.Value("string"),
50
+ "content": datasets.Value("string"),
51
+ }
52
+ ],
53
+ "language": datasets.Value("string"),
54
+ "image": datasets.Image(),
55
+ "turn": datasets.Value("int32"),
56
+ "anony": datasets.Value("bool"),
57
+ }
58
+ ),
59
  split=split,
60
  )
61
  return hf_dataset
 
81
  def load_battle_image(battle, log_dir):
82
  image_path = Path(log_dir) / f"{get_date_from_time_stamp(battle['tstamp'])}-convinput_images" / f"input_image_{battle['question_id']}.png"
83
  return load_image(image_path)
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
 
86
  def main(
87
+ data_file: str = "./results/latest/clean_battle_conv.json",
88
+ repo_id: str = "DongfuTingle/wildvision-bench",
89
+ log_dir: str = os.getenv("LOGDIR", "./vision-arena-logs/"),
90
+ mode="battle",
91
+ token = os.environ.get("HUGGINGFACE_TOKEN", None)
 
 
 
 
92
  ):
 
 
 
 
93
  with open(data_file, "r") as f:
94
  data = json.load(f)
95
 
 
 
 
96
 
97
 
98
+ has_image_stats = {
99
+ "has_image": 0,
100
+ "no_image": 0,
 
 
101
  }
102
+ if mode == "keep_bad_only":
103
+ # anony only
104
+ data = [d for d in data if d["anony"]]
105
+
106
+ new_data = []
107
+ for battle in data:
108
+ image = load_battle_image(battle, log_dir)
109
+ if image is None:
110
+ has_image_stats["no_image"] += 1
111
+ # we don't keep the data without image
112
+ continue
113
+ has_image_stats["has_image"] += 1
114
+
115
+ if battle["winner"] in ["model_a", "model_b"]:
116
+ if battle["winner"] == "model_a":
117
+ worse_model = "model_b"
118
+ worse_conv = "conversation_b"
119
+ if battle["winner"] == "model_b":
120
+ worse_model = "model_a"
121
+ worse_conv = "conversation_a"
122
+
123
+ new_data.append({
124
+ "question_id": battle["question_id"],
125
+ "model": battle[worse_model],
126
+ "conversation": battle[worse_conv],
127
+ "language": battle["language"],
128
+ "image": image,
129
+ "turn": battle["turn"],
130
+ })
131
+ elif battle["winner"] == "tie (bothbad)":
132
+
133
+ new_data.append({
134
+ "question_id": battle["question_id"],
135
+ "model": battle["model_a"],
136
+ "conversation": battle["conversation_a"],
137
+ "language": battle["language"],
138
+ "image": image,
139
+ "turn": battle["turn"],
140
+ })
141
 
142
+ new_data.append({
143
+ "question_id": battle["question_id"],
144
+ "model": battle["model_b"],
145
+ "conversation": battle["conversation_b"],
146
+ "language": battle["language"],
147
+ "image": image,
148
+ "turn": battle["turn"],
149
+ })
150
+
151
+ split = "test"
152
+ hf_dataset = create_hf_dataset(new_data, "test")
153
+
154
+ elif mode == "battle":
155
+ new_data = []
156
  for battle in data:
157
+ image = load_battle_image(battle, log_dir)
158
+ if image is None:
159
+ has_image_stats["no_image"] += 1
 
 
 
 
 
 
 
 
 
 
160
  continue
161
+ has_image_stats["has_image"] += 1
162
  new_data.append({
163
+ "question_id": battle["question_id"],
164
+ "model_a": battle["model_a"],
165
+ "model_b": battle["model_b"],
166
+ "conversation_a": battle["conversation_a"],
167
+ "conversation_b": battle["conversation_b"],
168
+ "language": battle["language"],
169
+ "image": image,
170
+ "turn": battle["turn"],
171
+ "anony": battle["anony"],
 
 
172
  })
173
  split = "test"
174
+ hf_dataset = create_hf_battle_dataset(new_data, "test")
175
  else:
176
+ raise ValueError(f"Invalid mode: {mode}")
177
 
178
+ print(f"Stats: {has_image_stats}")
179
  print(hf_dataset)
180
  print(f"Uploading to part {repo_id}:{split}...")
181
  hf_dataset.push_to_hub(
182
  repo_id=repo_id,
183
+ config_name=mode,
184
  split=split,
185
  token=token,
186
  commit_message=f"Add vision-arena {split} dataset",
arena_elo/elo_rating/utils.py CHANGED
@@ -3,20 +3,12 @@ import pytz
3
  import PIL
4
  import os
5
 
6
- import sys
7
- sys.path.append('../')
8
- from model.model_registry import get_model_info
9
-
10
  def detect_language(text: str) -> str:
11
  """Detect the langauge of a string."""
12
- try:
13
- import polyglot # pip3 install polyglot pyicu pycld2
14
- from polyglot.detect import Detector
15
- from polyglot.detect.base import logger as polyglot_logger
16
- import pycld2
17
- except ImportError as e:
18
- print("Please install the required libraries: polyglot, pycld2: pip3 install polyglot pyicu pycld2")
19
- exit(1)
20
 
21
  polyglot_logger.setLevel("ERROR")
22
 
 
3
  import PIL
4
  import os
5
 
 
 
 
 
6
  def detect_language(text: str) -> str:
7
  """Detect the langauge of a string."""
8
+ import polyglot # pip3 install polyglot pyicu pycld2
9
+ from polyglot.detect import Detector
10
+ from polyglot.detect.base import logger as polyglot_logger
11
+ import pycld2
 
 
 
 
12
 
13
  polyglot_logger.setLevel("ERROR")
14
 
arena_elo/generation_model_info.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "LCM": {
3
+ "Link": "https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7",
4
+ "License": "MIT License",
5
+ "Organization": "Tsinghua University"
6
+ },
7
+ "Playground v2": {
8
+ "Link": "https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic",
9
+ "License": "Playground v2 Community License",
10
+ "Organization": "Playground"
11
+ },
12
+ "OpenJourney": {
13
+ "Link": "https://huggingface.co/prompthero/openjourney",
14
+ "License": "creativeml-openrail-m",
15
+ "Organization": "PromptHero"
16
+ },
17
+ "SDXLTurbo": {
18
+ "Link": "https://huggingface.co/stabilityai/sdxl-turbo",
19
+ "License": "sai-nc-community (other)",
20
+ "Organization": "Stability AI"
21
+ },
22
+ "SDXL": {
23
+ "Link": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0",
24
+ "License": "openrail++",
25
+ "Organization": "Stability AI"
26
+ },
27
+ "PixArtAlpha": {
28
+ "Link": "https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS",
29
+ "License": "openrail++",
30
+ "Organization": "PixArt-alpha"
31
+ },
32
+ "SDXLLightning": {
33
+ "Link": "https://huggingface.co/ByteDance/SDXL-Lightning",
34
+ "License": "openrail++",
35
+ "Organization": "ByteDance"
36
+ },
37
+ "StableCascade": {
38
+ "Link": "https://huggingface.co/stabilityai/stable-cascade",
39
+ "License": "stable-cascade-nc-community (other)",
40
+ "Organization": "Stability AI"
41
+ }
42
+ }
arena_elo/results/20240315/elo_results_image_editing.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cef00c45d392a30913b367825270fcee5fd29e5c830866eef3d07146b3502f3
3
- size 57091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e528d30840c8a5787b0d2f08f27758b02f7eb718ccab695010b30df2127efe5e
3
+ size 57064
arena_elo/results/20240327/clean_battle_t2i_generation.json CHANGED
The diff for this file is too large to render. See raw diff
 
arena_elo/results/20240327/elo_results_t2i_generation.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f525abe69feb822d341929b27ef7660ddd5e6ff0491bed8383a8e3d19f0342bd
3
- size 62414
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fec01fe5af62dce3990634cffd1d926330ccbf170ef0c3b5d2f07fb06c4cf149
3
+ size 65189
arena_elo/results/20240327/t2i_generation_leaderboard.csv CHANGED
@@ -1,10 +1,11 @@
1
  key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
2
- Playground v2.5,Playground v2.5,1226.2872445351936,1246.1685934024742,Playground v2.5 Community License,Playground,https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic
3
- StableCascade,StableCascade,1105.3322734027522,1087.9198960927265,stable-cascade-nc-community (other),Stability AI,https://huggingface.co/stabilityai/stable-cascade
4
- Playground v2,Playground v2,1091.4371447234744,1090.676108819673,Playground v2 Community License,Playground,https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic
5
- SDXLLightning,SDXLLightning,1043.235902888147,1045.0529259890538,openrail++,ByteDance,https://huggingface.co/ByteDance/SDXL-Lightning
6
- PixArtAlpha,PixArtAlpha,1020.6412075829058,1006.9966036187151,openrail++,PixArt-alpha,https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS
7
- SDXL,SDXL,964.7626495363717,969.5241392802999,openrail++,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
8
- SDXLTurbo,SDXLTurbo,912.2113859675355,914.3805456579931,sai-nc-community (other),Stability AI,https://huggingface.co/stabilityai/sdxl-turbo
9
- OpenJourney,OpenJourney,841.2224045541894,832.2282703082603,creativeml-openrail-m,PromptHero,https://huggingface.co/prompthero/openjourney
10
- LCM,LCM,794.8697868094328,810.2118373597045,MIT License,Tsinghua University,https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7
 
 
1
  key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
2
+ Playground v2.5,Playground v2.5,1212.4660228554317,1233.021110469063,N/A,N/A,N/A
3
+ StableCascade,StableCascade,1098.8180832734447,1081.4707812969855,stable-cascade-nc-community (other),Stability AI,https://huggingface.co/stabilityai/stable-cascade
4
+ PlayGroundV2,PlayGroundV2,1089.993871580802,1088.6262085724481,N/A,N/A,N/A
5
+ Playground v2,Playground v2,1049.6156124554975,1051.618375116693,Playground v2 Community License,Playground,https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic
6
+ SDXLLightning,SDXLLightning,1036.8582186059539,1039.3079223370821,openrail++,ByteDance,https://huggingface.co/ByteDance/SDXL-Lightning
7
+ PixArtAlpha,PixArtAlpha,1016.2085497703334,1002.5100184720693,openrail++,PixArt-alpha,https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS
8
+ SDXL,SDXL,960.5073412035289,965.3037978455568,openrail++,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
9
+ SDXLTurbo,SDXLTurbo,907.997473382927,910.1644152252661,sai-nc-community (other),Stability AI,https://huggingface.co/stabilityai/sdxl-turbo
10
+ OpenJourney,OpenJourney,836.9689192463355,827.9470053715127,creativeml-openrail-m,PromptHero,https://huggingface.co/prompthero/openjourney
11
+ LCM,LCM,790.5659076257482,805.8155782210948,MIT License,Tsinghua University,https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7
arena_elo/results/20240328/clean_battle_image_editing.json DELETED
@@ -1,890 +0,0 @@
1
- [
2
- {
3
- "model_a": "CycleDiffusion",
4
- "model_b": "InstructPix2Pix",
5
- "winner": "model_b",
6
- "judge": "arena_user_::1",
7
- "anony": true,
8
- "tstamp": 1707712630.872
9
- },
10
- {
11
- "model_a": "CycleDiffusion",
12
- "model_b": "InstructPix2Pix",
13
- "winner": "model_b",
14
- "judge": "arena_user_::1",
15
- "anony": false,
16
- "tstamp": 1707712699.668
17
- },
18
- {
19
- "model_a": "Pix2PixZero",
20
- "model_b": "MagicBrush",
21
- "winner": "model_a",
22
- "judge": "arena_user_::1",
23
- "anony": true,
24
- "tstamp": 1707712896.0427
25
- },
26
- {
27
- "model_a": "CycleDiffusion",
28
- "model_b": "InstructPix2Pix",
29
- "winner": "model_b",
30
- "judge": "arena_user_::1",
31
- "anony": false,
32
- "tstamp": 1707712929.7061
33
- },
34
- {
35
- "model_a": "CycleDiffusion",
36
- "model_b": "InstructPix2Pix",
37
- "winner": "model_b",
38
- "judge": "arena_user_::1",
39
- "anony": true,
40
- "tstamp": 1707713147.0445
41
- },
42
- {
43
- "model_a": "CycleDiffusion",
44
- "model_b": "PNP",
45
- "winner": "model_b",
46
- "judge": "arena_user_::1",
47
- "anony": true,
48
- "tstamp": 1707713198.9284
49
- },
50
- {
51
- "model_a": "CycleDiffusion",
52
- "model_b": "Prompt2prompt",
53
- "winner": "model_b",
54
- "judge": "arena_user_::1",
55
- "anony": true,
56
- "tstamp": 1707713210.1306
57
- },
58
- {
59
- "model_a": "Prompt2prompt",
60
- "model_b": "SDEdit",
61
- "winner": "model_a",
62
- "judge": "arena_user_::1",
63
- "anony": true,
64
- "tstamp": 1707713747.5115
65
- },
66
- {
67
- "model_a": "PNP",
68
- "model_b": "Pix2PixZero",
69
- "winner": "model_a",
70
- "judge": "arena_user_::1",
71
- "anony": true,
72
- "tstamp": 1707715613.7226
73
- },
74
- {
75
- "model_a": "CycleDiffusion",
76
- "model_b": "MagicBrush",
77
- "winner": "model_b",
78
- "judge": "arena_user_::1",
79
- "anony": true,
80
- "tstamp": 1707765708.2644
81
- },
82
- {
83
- "model_a": "PNP",
84
- "model_b": "CycleDiffusion",
85
- "winner": "model_a",
86
- "judge": "arena_user_::1",
87
- "anony": true,
88
- "tstamp": 1707765861.2742
89
- },
90
- {
91
- "model_a": "PNP",
92
- "model_b": "CycleDiffusion",
93
- "winner": "model_a",
94
- "judge": "arena_user_::1",
95
- "anony": false,
96
- "tstamp": 1707765975.0206
97
- },
98
- {
99
- "model_a": "PNP",
100
- "model_b": "CycleDiffusion",
101
- "winner": "model_a",
102
- "judge": "arena_user_::1",
103
- "anony": true,
104
- "tstamp": 1707768866.9065
105
- },
106
- {
107
- "model_a": "SDEdit",
108
- "model_b": "MagicBrush",
109
- "winner": "model_b",
110
- "judge": "arena_user_::1",
111
- "anony": true,
112
- "tstamp": 1707771673.2989
113
- },
114
- {
115
- "model_a": "SDEdit",
116
- "model_b": "MagicBrush",
117
- "winner": "model_b",
118
- "judge": "arena_user_::1",
119
- "anony": true,
120
- "tstamp": 1707784377.6617
121
- },
122
- {
123
- "model_a": "SDEdit",
124
- "model_b": "MagicBrush",
125
- "winner": "model_b",
126
- "judge": "arena_user_::1",
127
- "anony": true,
128
- "tstamp": 1707784466.8915
129
- },
130
- {
131
- "model_a": "CycleDiffusion",
132
- "model_b": "PNP",
133
- "winner": "model_b",
134
- "judge": "arena_user_::1",
135
- "anony": true,
136
- "tstamp": 1707784983.9581
137
- },
138
- {
139
- "model_a": "MagicBrush",
140
- "model_b": "SDEdit",
141
- "winner": "model_a",
142
- "judge": "arena_user_::1",
143
- "anony": true,
144
- "tstamp": 1707785277.16
145
- },
146
- {
147
- "model_a": "MagicBrush",
148
- "model_b": "SDEdit",
149
- "winner": "model_a",
150
- "judge": "arena_user_::1",
151
- "anony": true,
152
- "tstamp": 1707795299.0619
153
- },
154
- {
155
- "model_a": "MagicBrush",
156
- "model_b": "SDEdit",
157
- "winner": "tie (bothbad)",
158
- "judge": "arena_user_::1",
159
- "anony": true,
160
- "tstamp": 1707795798.752
161
- },
162
- {
163
- "model_a": "SDEdit",
164
- "model_b": "Prompt2prompt",
165
- "winner": "model_b",
166
- "judge": "arena_user_::1",
167
- "anony": false,
168
- "tstamp": 1707796435.7996
169
- },
170
- {
171
- "model_a": "SDEdit",
172
- "model_b": "CycleDiffusion",
173
- "winner": "model_b",
174
- "judge": "arena_user_::1",
175
- "anony": false,
176
- "tstamp": 1707797278.7369
177
- },
178
- {
179
- "model_a": "SDEdit",
180
- "model_b": "CycleDiffusion",
181
- "winner": "model_a",
182
- "judge": "arena_user_::1",
183
- "anony": false,
184
- "tstamp": 1707797279.6004
185
- },
186
- {
187
- "model_a": "SDEdit",
188
- "model_b": "Prompt2prompt",
189
- "winner": "model_b",
190
- "judge": "arena_user_::1",
191
- "anony": true,
192
- "tstamp": 1707805086.9739
193
- },
194
- {
195
- "model_a": "PNP",
196
- "model_b": "SDEdit",
197
- "winner": "model_a",
198
- "judge": "arena_user_::1",
199
- "anony": true,
200
- "tstamp": 1707805220.3253
201
- },
202
- {
203
- "model_a": "InstructPix2Pix",
204
- "model_b": "CycleDiffusion",
205
- "winner": "tie (bothbad)",
206
- "judge": "arena_user_::1",
207
- "anony": true,
208
- "tstamp": 1707805332.6322
209
- },
210
- {
211
- "model_a": "InstructPix2Pix",
212
- "model_b": "Prompt2prompt",
213
- "winner": "model_b",
214
- "judge": "arena_user_::1",
215
- "anony": true,
216
- "tstamp": 1707805476.0509
217
- },
218
- {
219
- "model_a": "InstructPix2Pix",
220
- "model_b": "Prompt2prompt",
221
- "winner": "model_b",
222
- "judge": "arena_user_::1",
223
- "anony": true,
224
- "tstamp": 1707818374.3438
225
- },
226
- {
227
- "model_a": "PNP",
228
- "model_b": "Prompt2prompt",
229
- "winner": "model_b",
230
- "judge": "arena_user_::1",
231
- "anony": true,
232
- "tstamp": 1707834631.9088
233
- },
234
- {
235
- "model_a": "InstructPix2Pix",
236
- "model_b": "SDEdit",
237
- "winner": "model_a",
238
- "judge": "arena_user_::1",
239
- "anony": true,
240
- "tstamp": 1707834954.0147
241
- },
242
- {
243
- "model_a": "Prompt2prompt",
244
- "model_b": "Pix2PixZero",
245
- "winner": "tie (bothbad)",
246
- "judge": "arena_user_::1",
247
- "anony": true,
248
- "tstamp": 1707835366.544
249
- },
250
- {
251
- "model_a": "PNP",
252
- "model_b": "SDEdit",
253
- "winner": "model_a",
254
- "judge": "arena_user_::1",
255
- "anony": true,
256
- "tstamp": 1707835643.6178
257
- },
258
- {
259
- "model_a": "MagicBrush",
260
- "model_b": "InstructPix2Pix",
261
- "winner": "tie (bothbad)",
262
- "judge": "arena_user_::1",
263
- "anony": true,
264
- "tstamp": 1707835789.25
265
- },
266
- {
267
- "model_a": "MagicBrush",
268
- "model_b": "PNP",
269
- "winner": "tie (bothbad)",
270
- "judge": "arena_user_::1",
271
- "anony": true,
272
- "tstamp": 1707836852.671
273
- },
274
- {
275
- "model_a": "MagicBrush",
276
- "model_b": "InstructPix2Pix",
277
- "winner": "model_a",
278
- "judge": "arena_user_::1",
279
- "anony": false,
280
- "tstamp": 1707836952.6082
281
- },
282
- {
283
- "model_a": "CycleDiffusion",
284
- "model_b": "SDEdit",
285
- "winner": "tie (bothbad)",
286
- "judge": "arena_user_::1",
287
- "anony": false,
288
- "tstamp": 1707837020.7148
289
- },
290
- {
291
- "model_a": "InstructPix2Pix",
292
- "model_b": "PNP",
293
- "winner": "model_a",
294
- "judge": "arena_user_::1",
295
- "anony": true,
296
- "tstamp": 1707837226.2259
297
- },
298
- {
299
- "model_a": "Prompt2prompt",
300
- "model_b": "Pix2PixZero",
301
- "winner": "model_a",
302
- "judge": "arena_user_::1",
303
- "anony": true,
304
- "tstamp": 1707838166.1449
305
- },
306
- {
307
- "model_a": "InstructPix2Pix",
308
- "model_b": "MagicBrush",
309
- "winner": "tie (bothbad)",
310
- "judge": "arena_user_::1",
311
- "anony": true,
312
- "tstamp": 1707838405.0013
313
- },
314
- {
315
- "model_a": "MagicBrush",
316
- "model_b": "CycleDiffusion",
317
- "winner": "model_a",
318
- "judge": "arena_user_::1",
319
- "anony": true,
320
- "tstamp": 1707839133.3126
321
- },
322
- {
323
- "model_a": "Prompt2prompt",
324
- "model_b": "InstructPix2Pix",
325
- "winner": "model_a",
326
- "judge": "arena_user_::1",
327
- "anony": true,
328
- "tstamp": 1707839484.6824
329
- },
330
- {
331
- "model_a": "PNP",
332
- "model_b": "InstructPix2Pix",
333
- "winner": "tie (bothbad)",
334
- "judge": "arena_user_::1",
335
- "anony": true,
336
- "tstamp": 1707850104.2499
337
- },
338
- {
339
- "model_a": "InstructPix2Pix",
340
- "model_b": "Pix2PixZero",
341
- "winner": "model_a",
342
- "judge": "arena_user_::1",
343
- "anony": true,
344
- "tstamp": 1707851384.7689
345
- },
346
- {
347
- "model_a": "PNP",
348
- "model_b": "MagicBrush",
349
- "winner": "model_b",
350
- "judge": "arena_user_::1",
351
- "anony": true,
352
- "tstamp": 1707851936.9466
353
- },
354
- {
355
- "model_a": "CycleDiffusion",
356
- "model_b": "MagicBrush",
357
- "winner": "tie (bothbad)",
358
- "judge": "arena_user_::1",
359
- "anony": true,
360
- "tstamp": 1707852836.3291
361
- },
362
- {
363
- "model_a": "CycleDiffusion",
364
- "model_b": "MagicBrush",
365
- "winner": "tie (bothbad)",
366
- "judge": "arena_user_::1",
367
- "anony": false,
368
- "tstamp": 1707852878.673
369
- },
370
- {
371
- "model_a": "Prompt2prompt",
372
- "model_b": "InstructPix2Pix",
373
- "winner": "model_a",
374
- "judge": "arena_user_::1",
375
- "anony": true,
376
- "tstamp": 1707853008.1359
377
- },
378
- {
379
- "model_a": "InstructPix2Pix",
380
- "model_b": "Pix2PixZero",
381
- "winner": "model_a",
382
- "judge": "arena_user_::1",
383
- "anony": false,
384
- "tstamp": 1707856807.6229
385
- },
386
- {
387
- "model_a": "MagicBrush",
388
- "model_b": "Pix2PixZero",
389
- "winner": "tie (bothbad)",
390
- "judge": "arena_user_::1",
391
- "anony": false,
392
- "tstamp": 1707863740.3507
393
- },
394
- {
395
- "model_a": "MagicBrush",
396
- "model_b": "PNP",
397
- "winner": "model_b",
398
- "judge": "arena_user_::1",
399
- "anony": true,
400
- "tstamp": 1707866312.1118
401
- },
402
- {
403
- "model_a": "Pix2PixZero",
404
- "model_b": "Prompt2prompt",
405
- "winner": "model_b",
406
- "judge": "arena_user_::1",
407
- "anony": true,
408
- "tstamp": 1707883083.3533
409
- },
410
- {
411
- "model_a": "Pix2PixZero",
412
- "model_b": "InstructPix2Pix",
413
- "winner": "model_b",
414
- "judge": "arena_user_::1",
415
- "anony": true,
416
- "tstamp": 1707883181.1397
417
- },
418
- {
419
- "model_a": "Pix2PixZero",
420
- "model_b": "Prompt2prompt",
421
- "winner": "model_b",
422
- "judge": "arena_user_::1",
423
- "anony": true,
424
- "tstamp": 1707883187.9173
425
- },
426
- {
427
- "model_a": "PNP",
428
- "model_b": "Prompt2prompt",
429
- "winner": "model_a",
430
- "judge": "arena_user_::1",
431
- "anony": true,
432
- "tstamp": 1707883507.587
433
- },
434
- {
435
- "model_a": "Prompt2prompt",
436
- "model_b": "CycleDiffusion",
437
- "winner": "model_a",
438
- "judge": "arena_user_::1",
439
- "anony": true,
440
- "tstamp": 1707883939.6125
441
- },
442
- {
443
- "model_a": "Prompt2prompt",
444
- "model_b": "MagicBrush",
445
- "winner": "model_b",
446
- "judge": "arena_user_::1",
447
- "anony": true,
448
- "tstamp": 1707892689.4407
449
- },
450
- {
451
- "model_a": "MagicBrush",
452
- "model_b": "InstructPix2Pix",
453
- "winner": "model_b",
454
- "judge": "arena_user_::1",
455
- "anony": true,
456
- "tstamp": 1707908988.749
457
- },
458
- {
459
- "model_a": "Prompt2prompt",
460
- "model_b": "InstructPix2Pix",
461
- "winner": "model_a",
462
- "judge": "arena_user_::1",
463
- "anony": true,
464
- "tstamp": 1707912639.2701
465
- },
466
- {
467
- "model_a": "MagicBrush",
468
- "model_b": "Pix2PixZero",
469
- "winner": "model_a",
470
- "judge": "arena_user_::1",
471
- "anony": false,
472
- "tstamp": 1707917685.9574
473
- },
474
- {
475
- "model_a": "MagicBrush",
476
- "model_b": "InstructPix2Pix",
477
- "winner": "tie (bothbad)",
478
- "judge": "arena_user_::1",
479
- "anony": false,
480
- "tstamp": 1707919429.336
481
- },
482
- {
483
- "model_a": "InstructPix2Pix",
484
- "model_b": "CycleDiffusion",
485
- "winner": "model_a",
486
- "judge": "arena_user_::1",
487
- "anony": true,
488
- "tstamp": 1707932651.9192
489
- },
490
- {
491
- "model_a": "MagicBrush",
492
- "model_b": "InstructPix2Pix",
493
- "winner": "model_a",
494
- "judge": "arena_user_::1",
495
- "anony": true,
496
- "tstamp": 1707932749.3107
497
- },
498
- {
499
- "model_a": "Prompt2prompt",
500
- "model_b": "PNP",
501
- "winner": "model_a",
502
- "judge": "arena_user_::1",
503
- "anony": true,
504
- "tstamp": 1707933208.5797
505
- },
506
- {
507
- "model_a": "MagicBrush",
508
- "model_b": "Pix2PixZero",
509
- "winner": "model_a",
510
- "judge": "arena_user_::1",
511
- "anony": false,
512
- "tstamp": 1707945335.6341
513
- },
514
- {
515
- "model_a": "MagicBrush",
516
- "model_b": "PNP",
517
- "winner": "model_a",
518
- "judge": "arena_user_::1",
519
- "anony": false,
520
- "tstamp": 1708031168.6838
521
- },
522
- {
523
- "model_a": "Pix2PixZero",
524
- "model_b": "PNP",
525
- "winner": "model_b",
526
- "judge": "arena_user_::1",
527
- "anony": false,
528
- "tstamp": 1708038931.5388
529
- },
530
- {
531
- "model_a": "Pix2PixZero",
532
- "model_b": "CycleDiffusion",
533
- "winner": "tie (bothbad)",
534
- "judge": "arena_user_::1",
535
- "anony": true,
536
- "tstamp": 1708057382.78
537
- },
538
- {
539
- "model_a": "PNP",
540
- "model_b": "InstructPix2Pix",
541
- "winner": "model_b",
542
- "judge": "arena_user_::1",
543
- "anony": true,
544
- "tstamp": 1708093689.8237
545
- },
546
- {
547
- "model_a": "MagicBrush",
548
- "model_b": "PNP",
549
- "winner": "model_b",
550
- "judge": "arena_user_::1",
551
- "anony": true,
552
- "tstamp": 1708093910.4683
553
- },
554
- {
555
- "model_a": "Pix2PixZero",
556
- "model_b": "Prompt2prompt",
557
- "winner": "model_b",
558
- "judge": "arena_user_::1",
559
- "anony": false,
560
- "tstamp": 1708095090.8232
561
- },
562
- {
563
- "model_a": "Pix2PixZero",
564
- "model_b": "Prompt2prompt",
565
- "winner": "model_a",
566
- "judge": "arena_user_::1",
567
- "anony": false,
568
- "tstamp": 1708095305.4665
569
- },
570
- {
571
- "model_a": "InstructPix2Pix",
572
- "model_b": "Prompt2prompt",
573
- "winner": "model_b",
574
- "judge": "arena_user_::1",
575
- "anony": true,
576
- "tstamp": 1708140553.1694
577
- },
578
- {
579
- "model_a": "MagicBrush",
580
- "model_b": "Prompt2prompt",
581
- "winner": "model_a",
582
- "judge": "arena_user_::1",
583
- "anony": true,
584
- "tstamp": 1708145512.3656
585
- },
586
- {
587
- "model_a": "Pix2PixZero",
588
- "model_b": "Prompt2prompt",
589
- "winner": "tie (bothbad)",
590
- "judge": "arena_user_::1",
591
- "anony": true,
592
- "tstamp": 1708145724.4127
593
- },
594
- {
595
- "model_a": "Pix2PixZero",
596
- "model_b": "PNP",
597
- "winner": "model_b",
598
- "judge": "arena_user_::1",
599
- "anony": true,
600
- "tstamp": 1708146846.5098
601
- },
602
- {
603
- "model_a": "PNP",
604
- "model_b": "MagicBrush",
605
- "winner": "model_a",
606
- "judge": "arena_user_::1",
607
- "anony": true,
608
- "tstamp": 1708189738.4864
609
- },
610
- {
611
- "model_a": "Prompt2prompt",
612
- "model_b": "InstructPix2Pix",
613
- "winner": "model_b",
614
- "judge": "arena_user_::1",
615
- "anony": true,
616
- "tstamp": 1708235874.9246
617
- },
618
- {
619
- "model_a": "Pix2PixZero",
620
- "model_b": "PNP",
621
- "winner": "model_b",
622
- "judge": "arena_user_::1",
623
- "anony": false,
624
- "tstamp": 1708257619.7115
625
- },
626
- {
627
- "model_a": "MagicBrush",
628
- "model_b": "Pix2PixZero",
629
- "winner": "tie (bothbad)",
630
- "judge": "arena_user_::1",
631
- "anony": true,
632
- "tstamp": 1708341265.7655
633
- },
634
- {
635
- "model_a": "MagicBrush",
636
- "model_b": "InstructPix2Pix",
637
- "winner": "model_b",
638
- "judge": "arena_user_::1",
639
- "anony": true,
640
- "tstamp": 1708350183.3086
641
- },
642
- {
643
- "model_a": "MagicBrush",
644
- "model_b": "Pix2PixZero",
645
- "winner": "tie (bothbad)",
646
- "judge": "arena_user_::1",
647
- "anony": true,
648
- "tstamp": 1708399707.1681
649
- },
650
- {
651
- "model_a": "PNP",
652
- "model_b": "MagicBrush",
653
- "winner": "model_a",
654
- "judge": "arena_user_::1",
655
- "anony": true,
656
- "tstamp": 1708441502.4707
657
- },
658
- {
659
- "model_a": "InstructPix2Pix",
660
- "model_b": "MagicBrush",
661
- "winner": "model_a",
662
- "judge": "arena_user_::1",
663
- "anony": true,
664
- "tstamp": 1708441716.8195
665
- },
666
- {
667
- "model_a": "InstructPix2Pix",
668
- "model_b": "MagicBrush",
669
- "winner": "model_b",
670
- "judge": "arena_user_::1",
671
- "anony": false,
672
- "tstamp": 1708546759.2009
673
- },
674
- {
675
- "model_a": "InstructPix2Pix",
676
- "model_b": "MagicBrush",
677
- "winner": "model_a",
678
- "judge": "arena_user_::1",
679
- "anony": false,
680
- "tstamp": 1708546805.4892
681
- },
682
- {
683
- "model_a": "Pix2PixZero",
684
- "model_b": "CycleDiffusion",
685
- "winner": "tie (bothbad)",
686
- "judge": "arena_user_::1",
687
- "anony": true,
688
- "tstamp": 1708547082.7124
689
- },
690
- {
691
- "model_a": "InstructPix2Pix",
692
- "model_b": "MagicBrush",
693
- "winner": "model_b",
694
- "judge": "arena_user_::1",
695
- "anony": false,
696
- "tstamp": 1708547166.9685
697
- },
698
- {
699
- "model_a": "InstructPix2Pix",
700
- "model_b": "MagicBrush",
701
- "winner": "model_b",
702
- "judge": "arena_user_::1",
703
- "anony": false,
704
- "tstamp": 1708547293.7107
705
- },
706
- {
707
- "model_a": "CycleDiffusion",
708
- "model_b": "PNP",
709
- "winner": "tie (bothbad)",
710
- "judge": "arena_user_::1",
711
- "anony": true,
712
- "tstamp": 1708575046.0529
713
- },
714
- {
715
- "model_a": "CycleDiffusion",
716
- "model_b": "MagicBrush",
717
- "winner": "tie (bothbad)",
718
- "judge": "arena_user_::1",
719
- "anony": true,
720
- "tstamp": 1708615466.9264
721
- },
722
- {
723
- "model_a": "CycleDiffusion",
724
- "model_b": "MagicBrush",
725
- "winner": "model_b",
726
- "judge": "arena_user_::1",
727
- "anony": false,
728
- "tstamp": 1708615516.3341
729
- },
730
- {
731
- "model_a": "InstructPix2Pix",
732
- "model_b": "PNP",
733
- "winner": "model_b",
734
- "judge": "arena_user_::1",
735
- "anony": false,
736
- "tstamp": 1709205399.0098
737
- },
738
- {
739
- "model_a": "InstructPix2Pix",
740
- "model_b": "PNP",
741
- "winner": "model_b",
742
- "judge": "arena_user_::1",
743
- "anony": false,
744
- "tstamp": 1709205767.8923
745
- },
746
- {
747
- "model_a": "PNP",
748
- "model_b": "InstructPix2Pix",
749
- "winner": "model_b",
750
- "judge": "arena_user_::1",
751
- "anony": true,
752
- "tstamp": 1709443700.05
753
- },
754
- {
755
- "model_a": "MagicBrush",
756
- "model_b": "Pix2PixZero",
757
- "winner": "model_a",
758
- "judge": "arena_user_::1",
759
- "anony": true,
760
- "tstamp": 1709702898.9291
761
- },
762
- {
763
- "model_a": "CycleDiffusion",
764
- "model_b": "Prompt2prompt",
765
- "winner": "tie (bothbad)",
766
- "judge": "arena_user_::1",
767
- "anony": true,
768
- "tstamp": 1710091925.1861
769
- },
770
- {
771
- "model_a": "MagicBrush",
772
- "model_b": "InstructPix2Pix",
773
- "winner": "tie (bothbad)",
774
- "judge": "arena_user_::1",
775
- "anony": true,
776
- "tstamp": 1710517781.1525
777
- },
778
- {
779
- "model_a": "MagicBrush",
780
- "model_b": "InstructPix2Pix",
781
- "winner": "tie (bothbad)",
782
- "judge": "arena_user_::1",
783
- "anony": false,
784
- "tstamp": 1710517859.2942
785
- },
786
- {
787
- "model_a": "Pix2PixZero",
788
- "model_b": "CycleDiffusion",
789
- "winner": "tie (bothbad)",
790
- "judge": "arena_user_::1",
791
- "anony": true,
792
- "tstamp": 1710535672.9791
793
- },
794
- {
795
- "model_a": "CycleDiffusion",
796
- "model_b": "Pix2PixZero",
797
- "winner": "model_b",
798
- "judge": "arena_user_10.16.25.191",
799
- "anony": false,
800
- "tstamp": 1711610477.1213
801
- },
802
- {
803
- "model_a": "CycleDiffusion",
804
- "model_b": "Pix2PixZero",
805
- "winner": "model_b",
806
- "judge": "arena_user_10.16.7.189",
807
- "anony": false,
808
- "tstamp": 1711629129.3894
809
- },
810
- {
811
- "model_a": "InstructPix2Pix",
812
- "model_b": "CycleDiffusion",
813
- "winner": "model_b",
814
- "judge": "arena_user_10.16.7.189",
815
- "anony": false,
816
- "tstamp": 1711629705.2246
817
- },
818
- {
819
- "model_a": "CycleDiffusion",
820
- "model_b": "Pix2PixZero",
821
- "winner": "model_b",
822
- "judge": "arena_user_10.16.25.191",
823
- "anony": false,
824
- "tstamp": 1711630362.5575
825
- },
826
- {
827
- "model_a": "MagicBrush",
828
- "model_b": "SDEdit",
829
- "winner": "model_a",
830
- "judge": "arena_user_127.0.0.1",
831
- "anony": false,
832
- "tstamp": 1711631112.5207
833
- },
834
- {
835
- "model_a": "Pix2PixZero",
836
- "model_b": "Prompt2prompt",
837
- "winner": "model_a",
838
- "judge": "arena_user_10.16.41.118",
839
- "anony": false,
840
- "tstamp": 1711631690.5127
841
- },
842
- {
843
- "model_a": "MagicBrush",
844
- "model_b": "InstructPix2Pix",
845
- "winner": "model_a",
846
- "judge": "arena_user_127.0.0.1",
847
- "anony": false,
848
- "tstamp": 1711633200.2923
849
- },
850
- {
851
- "model_a": "MagicBrush",
852
- "model_b": "InstructPix2Pix",
853
- "winner": "model_a",
854
- "judge": "arena_user_127.0.0.1",
855
- "anony": false,
856
- "tstamp": 1711633594.9922
857
- },
858
- {
859
- "model_a": "MagicBrush",
860
- "model_b": "SDEdit",
861
- "winner": "model_a",
862
- "judge": "arena_user_10.16.7.189",
863
- "anony": false,
864
- "tstamp": 1711635443.3071
865
- },
866
- {
867
- "model_a": "CycleDiffusion",
868
- "model_b": "MagicBrush",
869
- "winner": "model_b",
870
- "judge": "arena_user_10.16.25.191",
871
- "anony": false,
872
- "tstamp": 1711635899.3088
873
- },
874
- {
875
- "model_a": "SDEdit",
876
- "model_b": "MagicBrush",
877
- "winner": "model_b",
878
- "judge": "arena_user_10.16.41.118",
879
- "anony": false,
880
- "tstamp": 1711639015.428
881
- },
882
- {
883
- "model_a": "InstructPix2Pix",
884
- "model_b": "MagicBrush",
885
- "winner": "model_b",
886
- "judge": "arena_user_10.16.7.189",
887
- "anony": false,
888
- "tstamp": 1711646372.1201
889
- }
890
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/results/20240328/elo_results_image_editing.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1430e6703dd6fc1e5b8ce06b11bb3a47516763a33edaf99e4c8547da5d9a8516
3
- size 57064
 
 
 
 
arena_elo/results/20240328/image_editing_leaderboard.csv DELETED
@@ -1,8 +0,0 @@
1
- key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
2
- Prompt2prompt,Prompt2prompt,1227.5508595026165,1158.5510681980204,Apache-2.0,"Google, Tel Aviv University",https://prompt-to-prompt.github.io
3
- InstructPix2Pix,InstructPix2Pix,1160.2057367236093,1071.0628993075604,"Copyright 2023 Timothy Brooks, Aleksander Holynski, Alexei A. Efros","University of California, Berkeley",https://www.timothybrooks.com/instruct-pix2pix
4
- PNP,PNP,1142.693603173293,1165.4957550490212,-,Weizmann Institute of Science,https://github.com/MichalGeyer/plug-and-play
5
- MagicBrush,MagicBrush,1053.1728944865915,1130.5422054860635,CC-BY-4.0,"The Ohio State University, University of Waterloo",https://osu-nlp-group.github.io/MagicBrush
6
- Pix2PixZero,Pix2PixZero,918.6047552604578,960.3217617445996,MIT License,"Carnegie Mellon University, Adobe Research",https://pix2pixzero.github.io
7
- CycleDiffusion,CycleDiffusion,865.0529105743963,813.4794423328381,X11,Carnegie Mellon University,https://github.com/ChenWu98/cycle-diffusion
8
- SDEdit,SDEdit,632.7192402790356,700.546867881897,MIT License,Stanford University,https://sde-image-editing.github.io
 
 
 
 
 
 
 
 
 
arena_elo/results/20240330/elo_results_t2i_generation.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e963f9d4b66d29c2f05a3923eff56cebd1f09b07223ac069456e08dc6143cda8
3
- size 66894
 
 
 
 
arena_elo/results/20240330/t2i_generation_leaderboard.csv DELETED
@@ -1,10 +0,0 @@
1
- key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
2
- Playground v2.5,Playground v2.5,1226.2872445351936,1236.5076527218755,Playground v2.5 Community License,Playground,https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic
3
- StableCascade,StableCascade,1105.3322734027522,1062.0980902577003,stable-cascade-nc-community (other),Stability AI,https://huggingface.co/stabilityai/stable-cascade
4
- Playground v2,Playground v2,1091.4371447234744,1087.3576445526567,Playground v2 Community License,Playground,https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic
5
- SDXLLightning,SDXLLightning,1043.235902888147,1019.4526672266176,openrail++,ByteDance,https://huggingface.co/ByteDance/SDXL-Lightning
6
- PixArtAlpha,PixArtAlpha,1020.6412075829058,1001.5090282446616,openrail++,PixArt-alpha,https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS
7
- SDXL,SDXL,964.7626495363717,969.8928133531979,openrail++,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
8
- SDXLTurbo,SDXLTurbo,912.2113859675355,914.9478831930971,sai-nc-community (other),Stability AI,https://huggingface.co/stabilityai/sdxl-turbo
9
- OpenJourney,OpenJourney,841.2224045541894,835.4563491411935,creativeml-openrail-m,PromptHero,https://huggingface.co/prompthero/openjourney
10
- LCM,LCM,794.8697868094328,812.962889153237,MIT License,Tsinghua University,https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7
 
 
 
 
 
 
 
 
 
 
 
arena_elo/results/20240408/clean_battle_t2i_generation.json DELETED
The diff for this file is too large to render. See raw diff
 
arena_elo/results/20240408/elo_results_t2i_generation.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd88783d1cf752a0977152f7e16e88b54759173cbb04fb55e9392703ff4819f5
3
- size 66931
 
 
 
 
arena_elo/results/20240408/t2i_generation_leaderboard.csv DELETED
@@ -1,10 +0,0 @@
1
- key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
2
- Playground v2.5,Playground v2.5,1226.2872445351936,1233.8616648345985,Playground v2.5 Community License,Playground,https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic
3
- StableCascade,StableCascade,1105.3322734027522,1031.1844458387527,stable-cascade-nc-community (other),Stability AI,https://huggingface.co/stabilityai/stable-cascade
4
- Playground v2,Playground v2,1091.4371447234744,1093.6921447327898,Playground v2 Community License,Playground,https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic
5
- SDXLLightning,SDXLLightning,1043.235902888147,1004.2360415152086,openrail++,ByteDance,https://huggingface.co/ByteDance/SDXL-Lightning
6
- PixArtAlpha,PixArtAlpha,1020.6412075829058,999.6264863931511,openrail++,PixArt-alpha,https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS
7
- SDXL,SDXL,964.7626495363717,975.3460583905047,openrail++,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
8
- SDXLTurbo,SDXLTurbo,912.2113859675355,927.1873122981513,sai-nc-community (other),Stability AI,https://huggingface.co/stabilityai/sdxl-turbo
9
- OpenJourney,OpenJourney,841.2224045541894,848.6657236271969,creativeml-openrail-m,PromptHero,https://huggingface.co/prompthero/openjourney
10
- LCM,LCM,794.8697868094328,828.5108951096241,MIT License,Tsinghua University,https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7
 
 
 
 
 
 
 
 
 
 
 
arena_elo/results/20240411/clean_battle_image_editing.json DELETED
@@ -1,906 +0,0 @@
1
- [
2
- {
3
- "model_a": "CycleDiffusion",
4
- "model_b": "InstructPix2Pix",
5
- "winner": "model_b",
6
- "judge": "arena_user_::1",
7
- "anony": true,
8
- "tstamp": 1707712630.872
9
- },
10
- {
11
- "model_a": "CycleDiffusion",
12
- "model_b": "InstructPix2Pix",
13
- "winner": "model_b",
14
- "judge": "arena_user_::1",
15
- "anony": false,
16
- "tstamp": 1707712699.668
17
- },
18
- {
19
- "model_a": "Pix2PixZero",
20
- "model_b": "MagicBrush",
21
- "winner": "model_a",
22
- "judge": "arena_user_::1",
23
- "anony": true,
24
- "tstamp": 1707712896.0427
25
- },
26
- {
27
- "model_a": "CycleDiffusion",
28
- "model_b": "InstructPix2Pix",
29
- "winner": "model_b",
30
- "judge": "arena_user_::1",
31
- "anony": false,
32
- "tstamp": 1707712929.7061
33
- },
34
- {
35
- "model_a": "CycleDiffusion",
36
- "model_b": "InstructPix2Pix",
37
- "winner": "model_b",
38
- "judge": "arena_user_::1",
39
- "anony": true,
40
- "tstamp": 1707713147.0445
41
- },
42
- {
43
- "model_a": "CycleDiffusion",
44
- "model_b": "PNP",
45
- "winner": "model_b",
46
- "judge": "arena_user_::1",
47
- "anony": true,
48
- "tstamp": 1707713198.9284
49
- },
50
- {
51
- "model_a": "CycleDiffusion",
52
- "model_b": "Prompt2prompt",
53
- "winner": "model_b",
54
- "judge": "arena_user_::1",
55
- "anony": true,
56
- "tstamp": 1707713210.1306
57
- },
58
- {
59
- "model_a": "Prompt2prompt",
60
- "model_b": "SDEdit",
61
- "winner": "model_a",
62
- "judge": "arena_user_::1",
63
- "anony": true,
64
- "tstamp": 1707713747.5115
65
- },
66
- {
67
- "model_a": "PNP",
68
- "model_b": "Pix2PixZero",
69
- "winner": "model_a",
70
- "judge": "arena_user_::1",
71
- "anony": true,
72
- "tstamp": 1707715613.7226
73
- },
74
- {
75
- "model_a": "CycleDiffusion",
76
- "model_b": "MagicBrush",
77
- "winner": "model_b",
78
- "judge": "arena_user_::1",
79
- "anony": true,
80
- "tstamp": 1707765708.2644
81
- },
82
- {
83
- "model_a": "PNP",
84
- "model_b": "CycleDiffusion",
85
- "winner": "model_a",
86
- "judge": "arena_user_::1",
87
- "anony": true,
88
- "tstamp": 1707765861.2742
89
- },
90
- {
91
- "model_a": "PNP",
92
- "model_b": "CycleDiffusion",
93
- "winner": "model_a",
94
- "judge": "arena_user_::1",
95
- "anony": false,
96
- "tstamp": 1707765975.0206
97
- },
98
- {
99
- "model_a": "PNP",
100
- "model_b": "CycleDiffusion",
101
- "winner": "model_a",
102
- "judge": "arena_user_::1",
103
- "anony": true,
104
- "tstamp": 1707768866.9065
105
- },
106
- {
107
- "model_a": "SDEdit",
108
- "model_b": "MagicBrush",
109
- "winner": "model_b",
110
- "judge": "arena_user_::1",
111
- "anony": true,
112
- "tstamp": 1707771673.2989
113
- },
114
- {
115
- "model_a": "SDEdit",
116
- "model_b": "MagicBrush",
117
- "winner": "model_b",
118
- "judge": "arena_user_::1",
119
- "anony": true,
120
- "tstamp": 1707784377.6617
121
- },
122
- {
123
- "model_a": "SDEdit",
124
- "model_b": "MagicBrush",
125
- "winner": "model_b",
126
- "judge": "arena_user_::1",
127
- "anony": true,
128
- "tstamp": 1707784466.8915
129
- },
130
- {
131
- "model_a": "CycleDiffusion",
132
- "model_b": "PNP",
133
- "winner": "model_b",
134
- "judge": "arena_user_::1",
135
- "anony": true,
136
- "tstamp": 1707784983.9581
137
- },
138
- {
139
- "model_a": "MagicBrush",
140
- "model_b": "SDEdit",
141
- "winner": "model_a",
142
- "judge": "arena_user_::1",
143
- "anony": true,
144
- "tstamp": 1707785277.16
145
- },
146
- {
147
- "model_a": "MagicBrush",
148
- "model_b": "SDEdit",
149
- "winner": "model_a",
150
- "judge": "arena_user_::1",
151
- "anony": true,
152
- "tstamp": 1707795299.0619
153
- },
154
- {
155
- "model_a": "MagicBrush",
156
- "model_b": "SDEdit",
157
- "winner": "tie (bothbad)",
158
- "judge": "arena_user_::1",
159
- "anony": true,
160
- "tstamp": 1707795798.752
161
- },
162
- {
163
- "model_a": "SDEdit",
164
- "model_b": "Prompt2prompt",
165
- "winner": "model_b",
166
- "judge": "arena_user_::1",
167
- "anony": false,
168
- "tstamp": 1707796435.7996
169
- },
170
- {
171
- "model_a": "SDEdit",
172
- "model_b": "CycleDiffusion",
173
- "winner": "model_b",
174
- "judge": "arena_user_::1",
175
- "anony": false,
176
- "tstamp": 1707797278.7369
177
- },
178
- {
179
- "model_a": "SDEdit",
180
- "model_b": "CycleDiffusion",
181
- "winner": "model_a",
182
- "judge": "arena_user_::1",
183
- "anony": false,
184
- "tstamp": 1707797279.6004
185
- },
186
- {
187
- "model_a": "SDEdit",
188
- "model_b": "Prompt2prompt",
189
- "winner": "model_b",
190
- "judge": "arena_user_::1",
191
- "anony": true,
192
- "tstamp": 1707805086.9739
193
- },
194
- {
195
- "model_a": "PNP",
196
- "model_b": "SDEdit",
197
- "winner": "model_a",
198
- "judge": "arena_user_::1",
199
- "anony": true,
200
- "tstamp": 1707805220.3253
201
- },
202
- {
203
- "model_a": "InstructPix2Pix",
204
- "model_b": "CycleDiffusion",
205
- "winner": "tie (bothbad)",
206
- "judge": "arena_user_::1",
207
- "anony": true,
208
- "tstamp": 1707805332.6322
209
- },
210
- {
211
- "model_a": "InstructPix2Pix",
212
- "model_b": "Prompt2prompt",
213
- "winner": "model_b",
214
- "judge": "arena_user_::1",
215
- "anony": true,
216
- "tstamp": 1707805476.0509
217
- },
218
- {
219
- "model_a": "InstructPix2Pix",
220
- "model_b": "Prompt2prompt",
221
- "winner": "model_b",
222
- "judge": "arena_user_::1",
223
- "anony": true,
224
- "tstamp": 1707818374.3438
225
- },
226
- {
227
- "model_a": "PNP",
228
- "model_b": "Prompt2prompt",
229
- "winner": "model_b",
230
- "judge": "arena_user_::1",
231
- "anony": true,
232
- "tstamp": 1707834631.9088
233
- },
234
- {
235
- "model_a": "InstructPix2Pix",
236
- "model_b": "SDEdit",
237
- "winner": "model_a",
238
- "judge": "arena_user_::1",
239
- "anony": true,
240
- "tstamp": 1707834954.0147
241
- },
242
- {
243
- "model_a": "Prompt2prompt",
244
- "model_b": "Pix2PixZero",
245
- "winner": "tie (bothbad)",
246
- "judge": "arena_user_::1",
247
- "anony": true,
248
- "tstamp": 1707835366.544
249
- },
250
- {
251
- "model_a": "PNP",
252
- "model_b": "SDEdit",
253
- "winner": "model_a",
254
- "judge": "arena_user_::1",
255
- "anony": true,
256
- "tstamp": 1707835643.6178
257
- },
258
- {
259
- "model_a": "MagicBrush",
260
- "model_b": "InstructPix2Pix",
261
- "winner": "tie (bothbad)",
262
- "judge": "arena_user_::1",
263
- "anony": true,
264
- "tstamp": 1707835789.25
265
- },
266
- {
267
- "model_a": "MagicBrush",
268
- "model_b": "PNP",
269
- "winner": "tie (bothbad)",
270
- "judge": "arena_user_::1",
271
- "anony": true,
272
- "tstamp": 1707836852.671
273
- },
274
- {
275
- "model_a": "MagicBrush",
276
- "model_b": "InstructPix2Pix",
277
- "winner": "model_a",
278
- "judge": "arena_user_::1",
279
- "anony": false,
280
- "tstamp": 1707836952.6082
281
- },
282
- {
283
- "model_a": "CycleDiffusion",
284
- "model_b": "SDEdit",
285
- "winner": "tie (bothbad)",
286
- "judge": "arena_user_::1",
287
- "anony": false,
288
- "tstamp": 1707837020.7148
289
- },
290
- {
291
- "model_a": "InstructPix2Pix",
292
- "model_b": "PNP",
293
- "winner": "model_a",
294
- "judge": "arena_user_::1",
295
- "anony": true,
296
- "tstamp": 1707837226.2259
297
- },
298
- {
299
- "model_a": "Prompt2prompt",
300
- "model_b": "Pix2PixZero",
301
- "winner": "model_a",
302
- "judge": "arena_user_::1",
303
- "anony": true,
304
- "tstamp": 1707838166.1449
305
- },
306
- {
307
- "model_a": "InstructPix2Pix",
308
- "model_b": "MagicBrush",
309
- "winner": "tie (bothbad)",
310
- "judge": "arena_user_::1",
311
- "anony": true,
312
- "tstamp": 1707838405.0013
313
- },
314
- {
315
- "model_a": "MagicBrush",
316
- "model_b": "CycleDiffusion",
317
- "winner": "model_a",
318
- "judge": "arena_user_::1",
319
- "anony": true,
320
- "tstamp": 1707839133.3126
321
- },
322
- {
323
- "model_a": "Prompt2prompt",
324
- "model_b": "InstructPix2Pix",
325
- "winner": "model_a",
326
- "judge": "arena_user_::1",
327
- "anony": true,
328
- "tstamp": 1707839484.6824
329
- },
330
- {
331
- "model_a": "PNP",
332
- "model_b": "InstructPix2Pix",
333
- "winner": "tie (bothbad)",
334
- "judge": "arena_user_::1",
335
- "anony": true,
336
- "tstamp": 1707850104.2499
337
- },
338
- {
339
- "model_a": "InstructPix2Pix",
340
- "model_b": "Pix2PixZero",
341
- "winner": "model_a",
342
- "judge": "arena_user_::1",
343
- "anony": true,
344
- "tstamp": 1707851384.7689
345
- },
346
- {
347
- "model_a": "PNP",
348
- "model_b": "MagicBrush",
349
- "winner": "model_b",
350
- "judge": "arena_user_::1",
351
- "anony": true,
352
- "tstamp": 1707851936.9466
353
- },
354
- {
355
- "model_a": "CycleDiffusion",
356
- "model_b": "MagicBrush",
357
- "winner": "tie (bothbad)",
358
- "judge": "arena_user_::1",
359
- "anony": true,
360
- "tstamp": 1707852836.3291
361
- },
362
- {
363
- "model_a": "CycleDiffusion",
364
- "model_b": "MagicBrush",
365
- "winner": "tie (bothbad)",
366
- "judge": "arena_user_::1",
367
- "anony": false,
368
- "tstamp": 1707852878.673
369
- },
370
- {
371
- "model_a": "Prompt2prompt",
372
- "model_b": "InstructPix2Pix",
373
- "winner": "model_a",
374
- "judge": "arena_user_::1",
375
- "anony": true,
376
- "tstamp": 1707853008.1359
377
- },
378
- {
379
- "model_a": "InstructPix2Pix",
380
- "model_b": "Pix2PixZero",
381
- "winner": "model_a",
382
- "judge": "arena_user_::1",
383
- "anony": false,
384
- "tstamp": 1707856807.6229
385
- },
386
- {
387
- "model_a": "MagicBrush",
388
- "model_b": "Pix2PixZero",
389
- "winner": "tie (bothbad)",
390
- "judge": "arena_user_::1",
391
- "anony": false,
392
- "tstamp": 1707863740.3507
393
- },
394
- {
395
- "model_a": "MagicBrush",
396
- "model_b": "PNP",
397
- "winner": "model_b",
398
- "judge": "arena_user_::1",
399
- "anony": true,
400
- "tstamp": 1707866312.1118
401
- },
402
- {
403
- "model_a": "Pix2PixZero",
404
- "model_b": "Prompt2prompt",
405
- "winner": "model_b",
406
- "judge": "arena_user_::1",
407
- "anony": true,
408
- "tstamp": 1707883083.3533
409
- },
410
- {
411
- "model_a": "Pix2PixZero",
412
- "model_b": "InstructPix2Pix",
413
- "winner": "model_b",
414
- "judge": "arena_user_::1",
415
- "anony": true,
416
- "tstamp": 1707883181.1397
417
- },
418
- {
419
- "model_a": "Pix2PixZero",
420
- "model_b": "Prompt2prompt",
421
- "winner": "model_b",
422
- "judge": "arena_user_::1",
423
- "anony": true,
424
- "tstamp": 1707883187.9173
425
- },
426
- {
427
- "model_a": "PNP",
428
- "model_b": "Prompt2prompt",
429
- "winner": "model_a",
430
- "judge": "arena_user_::1",
431
- "anony": true,
432
- "tstamp": 1707883507.587
433
- },
434
- {
435
- "model_a": "Prompt2prompt",
436
- "model_b": "CycleDiffusion",
437
- "winner": "model_a",
438
- "judge": "arena_user_::1",
439
- "anony": true,
440
- "tstamp": 1707883939.6125
441
- },
442
- {
443
- "model_a": "Prompt2prompt",
444
- "model_b": "MagicBrush",
445
- "winner": "model_b",
446
- "judge": "arena_user_::1",
447
- "anony": true,
448
- "tstamp": 1707892689.4407
449
- },
450
- {
451
- "model_a": "MagicBrush",
452
- "model_b": "InstructPix2Pix",
453
- "winner": "model_b",
454
- "judge": "arena_user_::1",
455
- "anony": true,
456
- "tstamp": 1707908988.749
457
- },
458
- {
459
- "model_a": "Prompt2prompt",
460
- "model_b": "InstructPix2Pix",
461
- "winner": "model_a",
462
- "judge": "arena_user_::1",
463
- "anony": true,
464
- "tstamp": 1707912639.2701
465
- },
466
- {
467
- "model_a": "MagicBrush",
468
- "model_b": "Pix2PixZero",
469
- "winner": "model_a",
470
- "judge": "arena_user_::1",
471
- "anony": false,
472
- "tstamp": 1707917685.9574
473
- },
474
- {
475
- "model_a": "MagicBrush",
476
- "model_b": "InstructPix2Pix",
477
- "winner": "tie (bothbad)",
478
- "judge": "arena_user_::1",
479
- "anony": false,
480
- "tstamp": 1707919429.336
481
- },
482
- {
483
- "model_a": "InstructPix2Pix",
484
- "model_b": "CycleDiffusion",
485
- "winner": "model_a",
486
- "judge": "arena_user_::1",
487
- "anony": true,
488
- "tstamp": 1707932651.9192
489
- },
490
- {
491
- "model_a": "MagicBrush",
492
- "model_b": "InstructPix2Pix",
493
- "winner": "model_a",
494
- "judge": "arena_user_::1",
495
- "anony": true,
496
- "tstamp": 1707932749.3107
497
- },
498
- {
499
- "model_a": "Prompt2prompt",
500
- "model_b": "PNP",
501
- "winner": "model_a",
502
- "judge": "arena_user_::1",
503
- "anony": true,
504
- "tstamp": 1707933208.5797
505
- },
506
- {
507
- "model_a": "MagicBrush",
508
- "model_b": "Pix2PixZero",
509
- "winner": "model_a",
510
- "judge": "arena_user_::1",
511
- "anony": false,
512
- "tstamp": 1707945335.6341
513
- },
514
- {
515
- "model_a": "MagicBrush",
516
- "model_b": "PNP",
517
- "winner": "model_a",
518
- "judge": "arena_user_::1",
519
- "anony": false,
520
- "tstamp": 1708031168.6838
521
- },
522
- {
523
- "model_a": "Pix2PixZero",
524
- "model_b": "PNP",
525
- "winner": "model_b",
526
- "judge": "arena_user_::1",
527
- "anony": false,
528
- "tstamp": 1708038931.5388
529
- },
530
- {
531
- "model_a": "Pix2PixZero",
532
- "model_b": "CycleDiffusion",
533
- "winner": "tie (bothbad)",
534
- "judge": "arena_user_::1",
535
- "anony": true,
536
- "tstamp": 1708057382.78
537
- },
538
- {
539
- "model_a": "PNP",
540
- "model_b": "InstructPix2Pix",
541
- "winner": "model_b",
542
- "judge": "arena_user_::1",
543
- "anony": true,
544
- "tstamp": 1708093689.8237
545
- },
546
- {
547
- "model_a": "MagicBrush",
548
- "model_b": "PNP",
549
- "winner": "model_b",
550
- "judge": "arena_user_::1",
551
- "anony": true,
552
- "tstamp": 1708093910.4683
553
- },
554
- {
555
- "model_a": "Pix2PixZero",
556
- "model_b": "Prompt2prompt",
557
- "winner": "model_b",
558
- "judge": "arena_user_::1",
559
- "anony": false,
560
- "tstamp": 1708095090.8232
561
- },
562
- {
563
- "model_a": "Pix2PixZero",
564
- "model_b": "Prompt2prompt",
565
- "winner": "model_a",
566
- "judge": "arena_user_::1",
567
- "anony": false,
568
- "tstamp": 1708095305.4665
569
- },
570
- {
571
- "model_a": "InstructPix2Pix",
572
- "model_b": "Prompt2prompt",
573
- "winner": "model_b",
574
- "judge": "arena_user_::1",
575
- "anony": true,
576
- "tstamp": 1708140553.1694
577
- },
578
- {
579
- "model_a": "MagicBrush",
580
- "model_b": "Prompt2prompt",
581
- "winner": "model_a",
582
- "judge": "arena_user_::1",
583
- "anony": true,
584
- "tstamp": 1708145512.3656
585
- },
586
- {
587
- "model_a": "Pix2PixZero",
588
- "model_b": "Prompt2prompt",
589
- "winner": "tie (bothbad)",
590
- "judge": "arena_user_::1",
591
- "anony": true,
592
- "tstamp": 1708145724.4127
593
- },
594
- {
595
- "model_a": "Pix2PixZero",
596
- "model_b": "PNP",
597
- "winner": "model_b",
598
- "judge": "arena_user_::1",
599
- "anony": true,
600
- "tstamp": 1708146846.5098
601
- },
602
- {
603
- "model_a": "PNP",
604
- "model_b": "MagicBrush",
605
- "winner": "model_a",
606
- "judge": "arena_user_::1",
607
- "anony": true,
608
- "tstamp": 1708189738.4864
609
- },
610
- {
611
- "model_a": "Prompt2prompt",
612
- "model_b": "InstructPix2Pix",
613
- "winner": "model_b",
614
- "judge": "arena_user_::1",
615
- "anony": true,
616
- "tstamp": 1708235874.9246
617
- },
618
- {
619
- "model_a": "Pix2PixZero",
620
- "model_b": "PNP",
621
- "winner": "model_b",
622
- "judge": "arena_user_::1",
623
- "anony": false,
624
- "tstamp": 1708257619.7115
625
- },
626
- {
627
- "model_a": "MagicBrush",
628
- "model_b": "Pix2PixZero",
629
- "winner": "tie (bothbad)",
630
- "judge": "arena_user_::1",
631
- "anony": true,
632
- "tstamp": 1708341265.7655
633
- },
634
- {
635
- "model_a": "MagicBrush",
636
- "model_b": "InstructPix2Pix",
637
- "winner": "model_b",
638
- "judge": "arena_user_::1",
639
- "anony": true,
640
- "tstamp": 1708350183.3086
641
- },
642
- {
643
- "model_a": "MagicBrush",
644
- "model_b": "Pix2PixZero",
645
- "winner": "tie (bothbad)",
646
- "judge": "arena_user_::1",
647
- "anony": true,
648
- "tstamp": 1708399707.1681
649
- },
650
- {
651
- "model_a": "PNP",
652
- "model_b": "MagicBrush",
653
- "winner": "model_a",
654
- "judge": "arena_user_::1",
655
- "anony": true,
656
- "tstamp": 1708441502.4707
657
- },
658
- {
659
- "model_a": "InstructPix2Pix",
660
- "model_b": "MagicBrush",
661
- "winner": "model_a",
662
- "judge": "arena_user_::1",
663
- "anony": true,
664
- "tstamp": 1708441716.8195
665
- },
666
- {
667
- "model_a": "InstructPix2Pix",
668
- "model_b": "MagicBrush",
669
- "winner": "model_b",
670
- "judge": "arena_user_::1",
671
- "anony": false,
672
- "tstamp": 1708546759.2009
673
- },
674
- {
675
- "model_a": "InstructPix2Pix",
676
- "model_b": "MagicBrush",
677
- "winner": "model_a",
678
- "judge": "arena_user_::1",
679
- "anony": false,
680
- "tstamp": 1708546805.4892
681
- },
682
- {
683
- "model_a": "Pix2PixZero",
684
- "model_b": "CycleDiffusion",
685
- "winner": "tie (bothbad)",
686
- "judge": "arena_user_::1",
687
- "anony": true,
688
- "tstamp": 1708547082.7124
689
- },
690
- {
691
- "model_a": "InstructPix2Pix",
692
- "model_b": "MagicBrush",
693
- "winner": "model_b",
694
- "judge": "arena_user_::1",
695
- "anony": false,
696
- "tstamp": 1708547166.9685
697
- },
698
- {
699
- "model_a": "InstructPix2Pix",
700
- "model_b": "MagicBrush",
701
- "winner": "model_b",
702
- "judge": "arena_user_::1",
703
- "anony": false,
704
- "tstamp": 1708547293.7107
705
- },
706
- {
707
- "model_a": "CycleDiffusion",
708
- "model_b": "PNP",
709
- "winner": "tie (bothbad)",
710
- "judge": "arena_user_::1",
711
- "anony": true,
712
- "tstamp": 1708575046.0529
713
- },
714
- {
715
- "model_a": "CycleDiffusion",
716
- "model_b": "MagicBrush",
717
- "winner": "tie (bothbad)",
718
- "judge": "arena_user_::1",
719
- "anony": true,
720
- "tstamp": 1708615466.9264
721
- },
722
- {
723
- "model_a": "CycleDiffusion",
724
- "model_b": "MagicBrush",
725
- "winner": "model_b",
726
- "judge": "arena_user_::1",
727
- "anony": false,
728
- "tstamp": 1708615516.3341
729
- },
730
- {
731
- "model_a": "InstructPix2Pix",
732
- "model_b": "PNP",
733
- "winner": "model_b",
734
- "judge": "arena_user_::1",
735
- "anony": false,
736
- "tstamp": 1709205399.0098
737
- },
738
- {
739
- "model_a": "InstructPix2Pix",
740
- "model_b": "PNP",
741
- "winner": "model_b",
742
- "judge": "arena_user_::1",
743
- "anony": false,
744
- "tstamp": 1709205767.8923
745
- },
746
- {
747
- "model_a": "PNP",
748
- "model_b": "InstructPix2Pix",
749
- "winner": "model_b",
750
- "judge": "arena_user_::1",
751
- "anony": true,
752
- "tstamp": 1709443700.05
753
- },
754
- {
755
- "model_a": "MagicBrush",
756
- "model_b": "Pix2PixZero",
757
- "winner": "model_a",
758
- "judge": "arena_user_::1",
759
- "anony": true,
760
- "tstamp": 1709702898.9291
761
- },
762
- {
763
- "model_a": "CycleDiffusion",
764
- "model_b": "Prompt2prompt",
765
- "winner": "tie (bothbad)",
766
- "judge": "arena_user_::1",
767
- "anony": true,
768
- "tstamp": 1710091925.1861
769
- },
770
- {
771
- "model_a": "MagicBrush",
772
- "model_b": "InstructPix2Pix",
773
- "winner": "tie (bothbad)",
774
- "judge": "arena_user_::1",
775
- "anony": true,
776
- "tstamp": 1710517781.1525
777
- },
778
- {
779
- "model_a": "MagicBrush",
780
- "model_b": "InstructPix2Pix",
781
- "winner": "tie (bothbad)",
782
- "judge": "arena_user_::1",
783
- "anony": false,
784
- "tstamp": 1710517859.2942
785
- },
786
- {
787
- "model_a": "Pix2PixZero",
788
- "model_b": "CycleDiffusion",
789
- "winner": "tie (bothbad)",
790
- "judge": "arena_user_::1",
791
- "anony": true,
792
- "tstamp": 1710535672.9791
793
- },
794
- {
795
- "model_a": "CycleDiffusion",
796
- "model_b": "Pix2PixZero",
797
- "winner": "model_b",
798
- "judge": "arena_user_10.16.25.191",
799
- "anony": true,
800
- "tstamp": 1711610477.1213
801
- },
802
- {
803
- "model_a": "CycleDiffusion",
804
- "model_b": "Pix2PixZero",
805
- "winner": "model_b",
806
- "judge": "arena_user_10.16.7.189",
807
- "anony": true,
808
- "tstamp": 1711629129.3894
809
- },
810
- {
811
- "model_a": "InstructPix2Pix",
812
- "model_b": "CycleDiffusion",
813
- "winner": "model_b",
814
- "judge": "arena_user_10.16.7.189",
815
- "anony": true,
816
- "tstamp": 1711629705.2246
817
- },
818
- {
819
- "model_a": "CycleDiffusion",
820
- "model_b": "Pix2PixZero",
821
- "winner": "model_b",
822
- "judge": "arena_user_10.16.25.191",
823
- "anony": true,
824
- "tstamp": 1711630362.5575
825
- },
826
- {
827
- "model_a": "MagicBrush",
828
- "model_b": "SDEdit",
829
- "winner": "model_a",
830
- "judge": "arena_user_127.0.0.1",
831
- "anony": true,
832
- "tstamp": 1711631112.5207
833
- },
834
- {
835
- "model_a": "Pix2PixZero",
836
- "model_b": "Prompt2prompt",
837
- "winner": "model_a",
838
- "judge": "arena_user_10.16.41.118",
839
- "anony": true,
840
- "tstamp": 1711631690.5127
841
- },
842
- {
843
- "model_a": "MagicBrush",
844
- "model_b": "InstructPix2Pix",
845
- "winner": "model_a",
846
- "judge": "arena_user_127.0.0.1",
847
- "anony": true,
848
- "tstamp": 1711633200.2923
849
- },
850
- {
851
- "model_a": "MagicBrush",
852
- "model_b": "InstructPix2Pix",
853
- "winner": "model_a",
854
- "judge": "arena_user_127.0.0.1",
855
- "anony": true,
856
- "tstamp": 1711633594.9922
857
- },
858
- {
859
- "model_a": "MagicBrush",
860
- "model_b": "SDEdit",
861
- "winner": "model_a",
862
- "judge": "arena_user_10.16.7.189",
863
- "anony": true,
864
- "tstamp": 1711635443.3071
865
- },
866
- {
867
- "model_a": "CycleDiffusion",
868
- "model_b": "MagicBrush",
869
- "winner": "model_b",
870
- "judge": "arena_user_10.16.25.191",
871
- "anony": true,
872
- "tstamp": 1711635899.3088
873
- },
874
- {
875
- "model_a": "SDEdit",
876
- "model_b": "MagicBrush",
877
- "winner": "model_b",
878
- "judge": "arena_user_10.16.41.118",
879
- "anony": true,
880
- "tstamp": 1711639015.428
881
- },
882
- {
883
- "model_a": "InstructPix2Pix",
884
- "model_b": "MagicBrush",
885
- "winner": "model_b",
886
- "judge": "arena_user_10.16.7.189",
887
- "anony": true,
888
- "tstamp": 1711646372.1201
889
- },
890
- {
891
- "model_a": "Pix2PixZero",
892
- "model_b": "Prompt2prompt",
893
- "winner": "model_b",
894
- "judge": "arena_user_10.16.17.217",
895
- "anony": true,
896
- "tstamp": 1712873850.0636
897
- },
898
- {
899
- "model_a": "MagicBrush",
900
- "model_b": "SDEdit",
901
- "winner": "tie (bothbad)",
902
- "judge": "arena_user_10.16.25.191",
903
- "anony": true,
904
- "tstamp": 1712876598.7667
905
- }
906
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/results/20240411/clean_battle_t2i_generation.json DELETED
The diff for this file is too large to render. See raw diff
 
arena_elo/results/20240411/elo_results_image_editing.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d66a54af51d2ecf89f461dbb4e15090d084638596952d3541ce369798a525ff3
3
- size 57096
 
 
 
 
arena_elo/results/20240411/elo_results_t2i_generation.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:caf98f560387fa9d6b8c233e9915807adad62315cfdd6d4a5e7c9fda30140eb8
3
- size 62422
 
 
 
 
arena_elo/results/20240411/image_editing_leaderboard.csv DELETED
@@ -1,8 +0,0 @@
1
- key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
2
- Prompt2prompt,Prompt2prompt,1188.219371435949,1160.9021011448333,Apache-2.0,"Google, Tel Aviv University",https://prompt-to-prompt.github.io
3
- PNP,PNP,1133.8594830307645,1160.2784411172045,-,Weizmann Institute of Science,https://github.com/MichalGeyer/plug-and-play
4
- InstructPix2Pix,InstructPix2Pix,1086.6617653998492,1065.4343032662,"Copyright 2023 Timothy Brooks, Aleksander Holynski, Alexei A. Efros","University of California, Berkeley",https://www.timothybrooks.com/instruct-pix2pix
5
- MagicBrush,MagicBrush,1084.8708678670623,1120.3917913590851,CC-BY-4.0,"The Ohio State University, University of Waterloo",https://osu-nlp-group.github.io/MagicBrush
6
- Pix2PixZero,Pix2PixZero,983.9050014855375,949.5286840298457,MIT License,"Carnegie Mellon University, Adobe Research",https://pix2pixzero.github.io
7
- CycleDiffusion,CycleDiffusion,847.634435323394,811.6166545238106,X11,Carnegie Mellon University,https://github.com/ChenWu98/cycle-diffusion
8
- SDEdit,SDEdit,674.8490754574439,731.8480245590208,MIT License,Stanford University,https://sde-image-editing.github.io
 
 
 
 
 
 
 
 
 
arena_elo/results/20240411/t2i_generation_leaderboard.csv DELETED
@@ -1,10 +0,0 @@
1
- key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
2
- PlayGround V2,PlayGround V2,1096.7894880225679,1099.8051043857877,Playground v2 Community License,Playground,https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic
3
- PlayGround V2.5,PlayGround V2.5,1087.8676967844767,1102.012177335679,Playground v2.5 Community License,Playground,https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic
4
- StableCascade,StableCascade,1055.9173326915914,1059.3764815279687,stable-cascade-nc-community (other),Stability AI,https://huggingface.co/stabilityai/stable-cascade
5
- PixArtAlpha,PixArtAlpha,1033.9990481857885,1022.7034421485712,openrail++,PixArt-alpha,https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS
6
- SDXLLightning,SDXLLightning,1033.7993884424232,1038.4887196068619,openrail++,ByteDance,https://huggingface.co/ByteDance/SDXL-Lightning
7
- SDXL,SDXL,1001.9345229118052,1000.9893451213411,openrail++,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
8
- SDXLTurbo,SDXLTurbo,954.8868434684313,951.3491425503697,sai-nc-community (other),Stability AI,https://huggingface.co/stabilityai/sdxl-turbo
9
- OpenJourney,OpenJourney,888.3709717134242,873.7483257587076,creativeml-openrail-m,PromptHero,https://huggingface.co/prompthero/openjourney
10
- LCM,LCM,846.4347077794937,852.2372365264126,MIT License,Tsinghua University,https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7
 
 
 
 
 
 
 
 
 
 
 
arena_elo/results/20240428/elo_results_image_editing.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1b4f1daab3429c7656eb8b3b2128a127480fa8212b17a1a98207884d7ce7a9f
3
- size 58442
 
 
 
 
arena_elo/results/20240428/image_editing_leaderboard.csv DELETED
@@ -1,8 +0,0 @@
1
- key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
2
- Prompt2prompt,Prompt2prompt,1224.5951620965877,1133.887231157847,Apache-2.0,"Google, Tel Aviv University",https://prompt-to-prompt.github.io
3
- InstructPix2Pix,InstructPix2Pix,1162.3591990023222,1059.7394666236296,"Copyright 2023 Timothy Brooks, Aleksander Holynski, Alexei A. Efros","University of California, Berkeley",https://www.timothybrooks.com/instruct-pix2pix
4
- PNP,PNP,1142.872221219748,1117.461082043853,-,Weizmann Institute of Science,https://github.com/MichalGeyer/plug-and-play
5
- MagicBrush,MagicBrush,1053.6353139288728,1055.6074426532264,CC-BY-4.0,"The Ohio State University, University of Waterloo",https://osu-nlp-group.github.io/MagicBrush
6
- Pix2PixZero,Pix2PixZero,918.4266240422415,853.535635519584,MIT License,"Carnegie Mellon University, Adobe Research",https://pix2pixzero.github.io
7
- CycleDiffusion,CycleDiffusion,865.2495984976465,775.6226309361784,X11,Carnegie Mellon University,https://github.com/ChenWu98/cycle-diffusion
8
- SDEdit,SDEdit,632.8618812125814,680.2047869803968,MIT License,Stanford University,https://sde-image-editing.github.io
 
 
 
 
 
 
 
 
 
arena_elo/results/20240501/clean_battle_t2i_generation.json DELETED
The diff for this file is too large to render. See raw diff
 
arena_elo/results/20240501/elo_results_t2i_generation.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b79d128ba01570bc59c5f48e1c0640f2541817ce1a77abb3e16131884288b1a
3
- size 65313
 
 
 
 
arena_elo/results/20240501/t2i_generation_leaderboard.csv DELETED
@@ -1,11 +0,0 @@
1
- key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
2
- PlayGround V2.5,PlayGround V2.5,1157.785440865029,1197.7936802344343,Playground v2.5 Community License,Playground,https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic
3
- StableCascade,StableCascade,1116.6696847615349,1116.9442071854512,stable-cascade-nc-community (other),Stability AI,https://huggingface.co/stabilityai/stable-cascade
4
- PlayGround V2,PlayGround V2,1110.1291971452683,1120.6591618464581,Playground v2 Community License,Playground,https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic
5
- PixArtAlpha,PixArtAlpha,1042.1316579959862,1040.3305680293547,openrail++,PixArt-alpha,https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS
6
- SDXLLightning,SDXLLightning,1036.0784815928241,1056.600050803737,openrail++,ByteDance,https://huggingface.co/ByteDance/SDXL-Lightning
7
- SDXL,SDXL,987.5686859787551,1003.0595102032345,openrail++,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
8
- PixArtSigma,PixArtSigma,948.0067582557859,961.4040676622378,N/A,N/A,N/A
9
- SDXLTurbo,SDXLTurbo,931.094996526404,945.5610964234802,sai-nc-community (other),Stability AI,https://huggingface.co/stabilityai/sdxl-turbo
10
- OpenJourney,OpenJourney,855.7449360962327,860.1159058283633,creativeml-openrail-m,PromptHero,https://huggingface.co/prompthero/openjourney
11
- LCM,LCM,814.7901607821794,840.5627577743975,MIT License,Tsinghua University,https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/results/20240516/clean_battle_image_editing.json DELETED
@@ -1,1578 +0,0 @@
1
- [
2
- {
3
- "model_a": "CycleDiffusion",
4
- "model_b": "InstructPix2Pix",
5
- "winner": "model_b",
6
- "judge": "arena_user_::1",
7
- "anony": true,
8
- "tstamp": 1707712630.872
9
- },
10
- {
11
- "model_a": "CycleDiffusion",
12
- "model_b": "InstructPix2Pix",
13
- "winner": "model_b",
14
- "judge": "arena_user_::1",
15
- "anony": false,
16
- "tstamp": 1707712699.668
17
- },
18
- {
19
- "model_a": "Pix2PixZero",
20
- "model_b": "MagicBrush",
21
- "winner": "model_a",
22
- "judge": "arena_user_::1",
23
- "anony": true,
24
- "tstamp": 1707712896.0427
25
- },
26
- {
27
- "model_a": "CycleDiffusion",
28
- "model_b": "InstructPix2Pix",
29
- "winner": "model_b",
30
- "judge": "arena_user_::1",
31
- "anony": false,
32
- "tstamp": 1707712929.7061
33
- },
34
- {
35
- "model_a": "CycleDiffusion",
36
- "model_b": "InstructPix2Pix",
37
- "winner": "model_b",
38
- "judge": "arena_user_::1",
39
- "anony": true,
40
- "tstamp": 1707713147.0445
41
- },
42
- {
43
- "model_a": "CycleDiffusion",
44
- "model_b": "PNP",
45
- "winner": "model_b",
46
- "judge": "arena_user_::1",
47
- "anony": true,
48
- "tstamp": 1707713198.9284
49
- },
50
- {
51
- "model_a": "CycleDiffusion",
52
- "model_b": "Prompt2prompt",
53
- "winner": "model_b",
54
- "judge": "arena_user_::1",
55
- "anony": true,
56
- "tstamp": 1707713210.1306
57
- },
58
- {
59
- "model_a": "Prompt2prompt",
60
- "model_b": "SDEdit",
61
- "winner": "model_a",
62
- "judge": "arena_user_::1",
63
- "anony": true,
64
- "tstamp": 1707713747.5115
65
- },
66
- {
67
- "model_a": "PNP",
68
- "model_b": "Pix2PixZero",
69
- "winner": "model_a",
70
- "judge": "arena_user_::1",
71
- "anony": true,
72
- "tstamp": 1707715613.7226
73
- },
74
- {
75
- "model_a": "CycleDiffusion",
76
- "model_b": "MagicBrush",
77
- "winner": "model_b",
78
- "judge": "arena_user_::1",
79
- "anony": true,
80
- "tstamp": 1707765708.2644
81
- },
82
- {
83
- "model_a": "PNP",
84
- "model_b": "CycleDiffusion",
85
- "winner": "model_a",
86
- "judge": "arena_user_::1",
87
- "anony": true,
88
- "tstamp": 1707765861.2742
89
- },
90
- {
91
- "model_a": "PNP",
92
- "model_b": "CycleDiffusion",
93
- "winner": "model_a",
94
- "judge": "arena_user_::1",
95
- "anony": false,
96
- "tstamp": 1707765975.0206
97
- },
98
- {
99
- "model_a": "PNP",
100
- "model_b": "CycleDiffusion",
101
- "winner": "model_a",
102
- "judge": "arena_user_::1",
103
- "anony": true,
104
- "tstamp": 1707768866.9065
105
- },
106
- {
107
- "model_a": "SDEdit",
108
- "model_b": "MagicBrush",
109
- "winner": "model_b",
110
- "judge": "arena_user_::1",
111
- "anony": true,
112
- "tstamp": 1707771673.2989
113
- },
114
- {
115
- "model_a": "SDEdit",
116
- "model_b": "MagicBrush",
117
- "winner": "model_b",
118
- "judge": "arena_user_::1",
119
- "anony": true,
120
- "tstamp": 1707784377.6617
121
- },
122
- {
123
- "model_a": "SDEdit",
124
- "model_b": "MagicBrush",
125
- "winner": "model_b",
126
- "judge": "arena_user_::1",
127
- "anony": true,
128
- "tstamp": 1707784466.8915
129
- },
130
- {
131
- "model_a": "CycleDiffusion",
132
- "model_b": "PNP",
133
- "winner": "model_b",
134
- "judge": "arena_user_::1",
135
- "anony": true,
136
- "tstamp": 1707784983.9581
137
- },
138
- {
139
- "model_a": "MagicBrush",
140
- "model_b": "SDEdit",
141
- "winner": "model_a",
142
- "judge": "arena_user_::1",
143
- "anony": true,
144
- "tstamp": 1707785277.16
145
- },
146
- {
147
- "model_a": "MagicBrush",
148
- "model_b": "SDEdit",
149
- "winner": "model_a",
150
- "judge": "arena_user_::1",
151
- "anony": true,
152
- "tstamp": 1707795299.0619
153
- },
154
- {
155
- "model_a": "MagicBrush",
156
- "model_b": "SDEdit",
157
- "winner": "tie (bothbad)",
158
- "judge": "arena_user_::1",
159
- "anony": true,
160
- "tstamp": 1707795798.752
161
- },
162
- {
163
- "model_a": "SDEdit",
164
- "model_b": "Prompt2prompt",
165
- "winner": "model_b",
166
- "judge": "arena_user_::1",
167
- "anony": false,
168
- "tstamp": 1707796435.7996
169
- },
170
- {
171
- "model_a": "SDEdit",
172
- "model_b": "CycleDiffusion",
173
- "winner": "model_b",
174
- "judge": "arena_user_::1",
175
- "anony": false,
176
- "tstamp": 1707797278.7369
177
- },
178
- {
179
- "model_a": "SDEdit",
180
- "model_b": "CycleDiffusion",
181
- "winner": "model_a",
182
- "judge": "arena_user_::1",
183
- "anony": false,
184
- "tstamp": 1707797279.6004
185
- },
186
- {
187
- "model_a": "SDEdit",
188
- "model_b": "Prompt2prompt",
189
- "winner": "model_b",
190
- "judge": "arena_user_::1",
191
- "anony": true,
192
- "tstamp": 1707805086.9739
193
- },
194
- {
195
- "model_a": "PNP",
196
- "model_b": "SDEdit",
197
- "winner": "model_a",
198
- "judge": "arena_user_::1",
199
- "anony": true,
200
- "tstamp": 1707805220.3253
201
- },
202
- {
203
- "model_a": "InstructPix2Pix",
204
- "model_b": "CycleDiffusion",
205
- "winner": "tie (bothbad)",
206
- "judge": "arena_user_::1",
207
- "anony": true,
208
- "tstamp": 1707805332.6322
209
- },
210
- {
211
- "model_a": "InstructPix2Pix",
212
- "model_b": "Prompt2prompt",
213
- "winner": "model_b",
214
- "judge": "arena_user_::1",
215
- "anony": true,
216
- "tstamp": 1707805476.0509
217
- },
218
- {
219
- "model_a": "InstructPix2Pix",
220
- "model_b": "Prompt2prompt",
221
- "winner": "model_b",
222
- "judge": "arena_user_::1",
223
- "anony": true,
224
- "tstamp": 1707818374.3438
225
- },
226
- {
227
- "model_a": "PNP",
228
- "model_b": "Prompt2prompt",
229
- "winner": "model_b",
230
- "judge": "arena_user_::1",
231
- "anony": true,
232
- "tstamp": 1707834631.9088
233
- },
234
- {
235
- "model_a": "InstructPix2Pix",
236
- "model_b": "SDEdit",
237
- "winner": "model_a",
238
- "judge": "arena_user_::1",
239
- "anony": true,
240
- "tstamp": 1707834954.0147
241
- },
242
- {
243
- "model_a": "Prompt2prompt",
244
- "model_b": "Pix2PixZero",
245
- "winner": "tie (bothbad)",
246
- "judge": "arena_user_::1",
247
- "anony": true,
248
- "tstamp": 1707835366.544
249
- },
250
- {
251
- "model_a": "PNP",
252
- "model_b": "SDEdit",
253
- "winner": "model_a",
254
- "judge": "arena_user_::1",
255
- "anony": true,
256
- "tstamp": 1707835643.6178
257
- },
258
- {
259
- "model_a": "MagicBrush",
260
- "model_b": "InstructPix2Pix",
261
- "winner": "tie (bothbad)",
262
- "judge": "arena_user_::1",
263
- "anony": true,
264
- "tstamp": 1707835789.25
265
- },
266
- {
267
- "model_a": "MagicBrush",
268
- "model_b": "PNP",
269
- "winner": "tie (bothbad)",
270
- "judge": "arena_user_::1",
271
- "anony": true,
272
- "tstamp": 1707836852.671
273
- },
274
- {
275
- "model_a": "MagicBrush",
276
- "model_b": "InstructPix2Pix",
277
- "winner": "model_a",
278
- "judge": "arena_user_::1",
279
- "anony": false,
280
- "tstamp": 1707836952.6082
281
- },
282
- {
283
- "model_a": "CycleDiffusion",
284
- "model_b": "SDEdit",
285
- "winner": "tie (bothbad)",
286
- "judge": "arena_user_::1",
287
- "anony": false,
288
- "tstamp": 1707837020.7148
289
- },
290
- {
291
- "model_a": "InstructPix2Pix",
292
- "model_b": "PNP",
293
- "winner": "model_a",
294
- "judge": "arena_user_::1",
295
- "anony": true,
296
- "tstamp": 1707837226.2259
297
- },
298
- {
299
- "model_a": "Prompt2prompt",
300
- "model_b": "Pix2PixZero",
301
- "winner": "model_a",
302
- "judge": "arena_user_::1",
303
- "anony": true,
304
- "tstamp": 1707838166.1449
305
- },
306
- {
307
- "model_a": "InstructPix2Pix",
308
- "model_b": "MagicBrush",
309
- "winner": "tie (bothbad)",
310
- "judge": "arena_user_::1",
311
- "anony": true,
312
- "tstamp": 1707838405.0013
313
- },
314
- {
315
- "model_a": "MagicBrush",
316
- "model_b": "CycleDiffusion",
317
- "winner": "model_a",
318
- "judge": "arena_user_::1",
319
- "anony": true,
320
- "tstamp": 1707839133.3126
321
- },
322
- {
323
- "model_a": "Prompt2prompt",
324
- "model_b": "InstructPix2Pix",
325
- "winner": "model_a",
326
- "judge": "arena_user_::1",
327
- "anony": true,
328
- "tstamp": 1707839484.6824
329
- },
330
- {
331
- "model_a": "PNP",
332
- "model_b": "InstructPix2Pix",
333
- "winner": "tie (bothbad)",
334
- "judge": "arena_user_::1",
335
- "anony": true,
336
- "tstamp": 1707850104.2499
337
- },
338
- {
339
- "model_a": "InstructPix2Pix",
340
- "model_b": "Pix2PixZero",
341
- "winner": "model_a",
342
- "judge": "arena_user_::1",
343
- "anony": true,
344
- "tstamp": 1707851384.7689
345
- },
346
- {
347
- "model_a": "PNP",
348
- "model_b": "MagicBrush",
349
- "winner": "model_b",
350
- "judge": "arena_user_::1",
351
- "anony": true,
352
- "tstamp": 1707851936.9466
353
- },
354
- {
355
- "model_a": "CycleDiffusion",
356
- "model_b": "MagicBrush",
357
- "winner": "tie (bothbad)",
358
- "judge": "arena_user_::1",
359
- "anony": true,
360
- "tstamp": 1707852836.3291
361
- },
362
- {
363
- "model_a": "CycleDiffusion",
364
- "model_b": "MagicBrush",
365
- "winner": "tie (bothbad)",
366
- "judge": "arena_user_::1",
367
- "anony": false,
368
- "tstamp": 1707852878.673
369
- },
370
- {
371
- "model_a": "Prompt2prompt",
372
- "model_b": "InstructPix2Pix",
373
- "winner": "model_a",
374
- "judge": "arena_user_::1",
375
- "anony": true,
376
- "tstamp": 1707853008.1359
377
- },
378
- {
379
- "model_a": "InstructPix2Pix",
380
- "model_b": "Pix2PixZero",
381
- "winner": "model_a",
382
- "judge": "arena_user_::1",
383
- "anony": false,
384
- "tstamp": 1707856807.6229
385
- },
386
- {
387
- "model_a": "MagicBrush",
388
- "model_b": "Pix2PixZero",
389
- "winner": "tie (bothbad)",
390
- "judge": "arena_user_::1",
391
- "anony": false,
392
- "tstamp": 1707863740.3507
393
- },
394
- {
395
- "model_a": "MagicBrush",
396
- "model_b": "PNP",
397
- "winner": "model_b",
398
- "judge": "arena_user_::1",
399
- "anony": true,
400
- "tstamp": 1707866312.1118
401
- },
402
- {
403
- "model_a": "Pix2PixZero",
404
- "model_b": "Prompt2prompt",
405
- "winner": "model_b",
406
- "judge": "arena_user_::1",
407
- "anony": true,
408
- "tstamp": 1707883083.3533
409
- },
410
- {
411
- "model_a": "Pix2PixZero",
412
- "model_b": "InstructPix2Pix",
413
- "winner": "model_b",
414
- "judge": "arena_user_::1",
415
- "anony": true,
416
- "tstamp": 1707883181.1397
417
- },
418
- {
419
- "model_a": "Pix2PixZero",
420
- "model_b": "Prompt2prompt",
421
- "winner": "model_b",
422
- "judge": "arena_user_::1",
423
- "anony": true,
424
- "tstamp": 1707883187.9173
425
- },
426
- {
427
- "model_a": "PNP",
428
- "model_b": "Prompt2prompt",
429
- "winner": "model_a",
430
- "judge": "arena_user_::1",
431
- "anony": true,
432
- "tstamp": 1707883507.587
433
- },
434
- {
435
- "model_a": "Prompt2prompt",
436
- "model_b": "CycleDiffusion",
437
- "winner": "model_a",
438
- "judge": "arena_user_::1",
439
- "anony": true,
440
- "tstamp": 1707883939.6125
441
- },
442
- {
443
- "model_a": "Prompt2prompt",
444
- "model_b": "MagicBrush",
445
- "winner": "model_b",
446
- "judge": "arena_user_::1",
447
- "anony": true,
448
- "tstamp": 1707892689.4407
449
- },
450
- {
451
- "model_a": "MagicBrush",
452
- "model_b": "InstructPix2Pix",
453
- "winner": "model_b",
454
- "judge": "arena_user_::1",
455
- "anony": true,
456
- "tstamp": 1707908988.749
457
- },
458
- {
459
- "model_a": "Prompt2prompt",
460
- "model_b": "InstructPix2Pix",
461
- "winner": "model_a",
462
- "judge": "arena_user_::1",
463
- "anony": true,
464
- "tstamp": 1707912639.2701
465
- },
466
- {
467
- "model_a": "MagicBrush",
468
- "model_b": "Pix2PixZero",
469
- "winner": "model_a",
470
- "judge": "arena_user_::1",
471
- "anony": false,
472
- "tstamp": 1707917685.9574
473
- },
474
- {
475
- "model_a": "MagicBrush",
476
- "model_b": "InstructPix2Pix",
477
- "winner": "tie (bothbad)",
478
- "judge": "arena_user_::1",
479
- "anony": false,
480
- "tstamp": 1707919429.336
481
- },
482
- {
483
- "model_a": "InstructPix2Pix",
484
- "model_b": "CycleDiffusion",
485
- "winner": "model_a",
486
- "judge": "arena_user_::1",
487
- "anony": true,
488
- "tstamp": 1707932651.9192
489
- },
490
- {
491
- "model_a": "MagicBrush",
492
- "model_b": "InstructPix2Pix",
493
- "winner": "model_a",
494
- "judge": "arena_user_::1",
495
- "anony": true,
496
- "tstamp": 1707932749.3107
497
- },
498
- {
499
- "model_a": "Prompt2prompt",
500
- "model_b": "PNP",
501
- "winner": "model_a",
502
- "judge": "arena_user_::1",
503
- "anony": true,
504
- "tstamp": 1707933208.5797
505
- },
506
- {
507
- "model_a": "MagicBrush",
508
- "model_b": "Pix2PixZero",
509
- "winner": "model_a",
510
- "judge": "arena_user_::1",
511
- "anony": false,
512
- "tstamp": 1707945335.6341
513
- },
514
- {
515
- "model_a": "MagicBrush",
516
- "model_b": "PNP",
517
- "winner": "model_a",
518
- "judge": "arena_user_::1",
519
- "anony": false,
520
- "tstamp": 1708031168.6838
521
- },
522
- {
523
- "model_a": "Pix2PixZero",
524
- "model_b": "PNP",
525
- "winner": "model_b",
526
- "judge": "arena_user_::1",
527
- "anony": false,
528
- "tstamp": 1708038931.5388
529
- },
530
- {
531
- "model_a": "Pix2PixZero",
532
- "model_b": "CycleDiffusion",
533
- "winner": "tie (bothbad)",
534
- "judge": "arena_user_::1",
535
- "anony": true,
536
- "tstamp": 1708057382.78
537
- },
538
- {
539
- "model_a": "PNP",
540
- "model_b": "InstructPix2Pix",
541
- "winner": "model_b",
542
- "judge": "arena_user_::1",
543
- "anony": true,
544
- "tstamp": 1708093689.8237
545
- },
546
- {
547
- "model_a": "MagicBrush",
548
- "model_b": "PNP",
549
- "winner": "model_b",
550
- "judge": "arena_user_::1",
551
- "anony": true,
552
- "tstamp": 1708093910.4683
553
- },
554
- {
555
- "model_a": "Pix2PixZero",
556
- "model_b": "Prompt2prompt",
557
- "winner": "model_b",
558
- "judge": "arena_user_::1",
559
- "anony": false,
560
- "tstamp": 1708095090.8232
561
- },
562
- {
563
- "model_a": "Pix2PixZero",
564
- "model_b": "Prompt2prompt",
565
- "winner": "model_a",
566
- "judge": "arena_user_::1",
567
- "anony": false,
568
- "tstamp": 1708095305.4665
569
- },
570
- {
571
- "model_a": "InstructPix2Pix",
572
- "model_b": "Prompt2prompt",
573
- "winner": "model_b",
574
- "judge": "arena_user_::1",
575
- "anony": true,
576
- "tstamp": 1708140553.1694
577
- },
578
- {
579
- "model_a": "MagicBrush",
580
- "model_b": "Prompt2prompt",
581
- "winner": "model_a",
582
- "judge": "arena_user_::1",
583
- "anony": true,
584
- "tstamp": 1708145512.3656
585
- },
586
- {
587
- "model_a": "Pix2PixZero",
588
- "model_b": "Prompt2prompt",
589
- "winner": "tie (bothbad)",
590
- "judge": "arena_user_::1",
591
- "anony": true,
592
- "tstamp": 1708145724.4127
593
- },
594
- {
595
- "model_a": "Pix2PixZero",
596
- "model_b": "PNP",
597
- "winner": "model_b",
598
- "judge": "arena_user_::1",
599
- "anony": true,
600
- "tstamp": 1708146846.5098
601
- },
602
- {
603
- "model_a": "PNP",
604
- "model_b": "MagicBrush",
605
- "winner": "model_a",
606
- "judge": "arena_user_::1",
607
- "anony": true,
608
- "tstamp": 1708189738.4864
609
- },
610
- {
611
- "model_a": "Prompt2prompt",
612
- "model_b": "InstructPix2Pix",
613
- "winner": "model_b",
614
- "judge": "arena_user_::1",
615
- "anony": true,
616
- "tstamp": 1708235874.9246
617
- },
618
- {
619
- "model_a": "Pix2PixZero",
620
- "model_b": "PNP",
621
- "winner": "model_b",
622
- "judge": "arena_user_::1",
623
- "anony": false,
624
- "tstamp": 1708257619.7115
625
- },
626
- {
627
- "model_a": "MagicBrush",
628
- "model_b": "Pix2PixZero",
629
- "winner": "tie (bothbad)",
630
- "judge": "arena_user_::1",
631
- "anony": true,
632
- "tstamp": 1708341265.7655
633
- },
634
- {
635
- "model_a": "MagicBrush",
636
- "model_b": "InstructPix2Pix",
637
- "winner": "model_b",
638
- "judge": "arena_user_::1",
639
- "anony": true,
640
- "tstamp": 1708350183.3086
641
- },
642
- {
643
- "model_a": "MagicBrush",
644
- "model_b": "Pix2PixZero",
645
- "winner": "tie (bothbad)",
646
- "judge": "arena_user_::1",
647
- "anony": true,
648
- "tstamp": 1708399707.1681
649
- },
650
- {
651
- "model_a": "PNP",
652
- "model_b": "MagicBrush",
653
- "winner": "model_a",
654
- "judge": "arena_user_::1",
655
- "anony": true,
656
- "tstamp": 1708441502.4707
657
- },
658
- {
659
- "model_a": "InstructPix2Pix",
660
- "model_b": "MagicBrush",
661
- "winner": "model_a",
662
- "judge": "arena_user_::1",
663
- "anony": true,
664
- "tstamp": 1708441716.8195
665
- },
666
- {
667
- "model_a": "InstructPix2Pix",
668
- "model_b": "MagicBrush",
669
- "winner": "model_b",
670
- "judge": "arena_user_::1",
671
- "anony": false,
672
- "tstamp": 1708546759.2009
673
- },
674
- {
675
- "model_a": "InstructPix2Pix",
676
- "model_b": "MagicBrush",
677
- "winner": "model_a",
678
- "judge": "arena_user_::1",
679
- "anony": false,
680
- "tstamp": 1708546805.4892
681
- },
682
- {
683
- "model_a": "Pix2PixZero",
684
- "model_b": "CycleDiffusion",
685
- "winner": "tie (bothbad)",
686
- "judge": "arena_user_::1",
687
- "anony": true,
688
- "tstamp": 1708547082.7124
689
- },
690
- {
691
- "model_a": "InstructPix2Pix",
692
- "model_b": "MagicBrush",
693
- "winner": "model_b",
694
- "judge": "arena_user_::1",
695
- "anony": false,
696
- "tstamp": 1708547166.9685
697
- },
698
- {
699
- "model_a": "InstructPix2Pix",
700
- "model_b": "MagicBrush",
701
- "winner": "model_b",
702
- "judge": "arena_user_::1",
703
- "anony": false,
704
- "tstamp": 1708547293.7107
705
- },
706
- {
707
- "model_a": "CycleDiffusion",
708
- "model_b": "PNP",
709
- "winner": "tie (bothbad)",
710
- "judge": "arena_user_::1",
711
- "anony": true,
712
- "tstamp": 1708575046.0529
713
- },
714
- {
715
- "model_a": "CycleDiffusion",
716
- "model_b": "MagicBrush",
717
- "winner": "tie (bothbad)",
718
- "judge": "arena_user_::1",
719
- "anony": true,
720
- "tstamp": 1708615466.9264
721
- },
722
- {
723
- "model_a": "CycleDiffusion",
724
- "model_b": "MagicBrush",
725
- "winner": "model_b",
726
- "judge": "arena_user_::1",
727
- "anony": false,
728
- "tstamp": 1708615516.3341
729
- },
730
- {
731
- "model_a": "InstructPix2Pix",
732
- "model_b": "PNP",
733
- "winner": "model_b",
734
- "judge": "arena_user_::1",
735
- "anony": false,
736
- "tstamp": 1709205399.0098
737
- },
738
- {
739
- "model_a": "InstructPix2Pix",
740
- "model_b": "PNP",
741
- "winner": "model_b",
742
- "judge": "arena_user_::1",
743
- "anony": false,
744
- "tstamp": 1709205767.8923
745
- },
746
- {
747
- "model_a": "PNP",
748
- "model_b": "InstructPix2Pix",
749
- "winner": "model_b",
750
- "judge": "arena_user_::1",
751
- "anony": true,
752
- "tstamp": 1709443700.05
753
- },
754
- {
755
- "model_a": "MagicBrush",
756
- "model_b": "Pix2PixZero",
757
- "winner": "model_a",
758
- "judge": "arena_user_::1",
759
- "anony": true,
760
- "tstamp": 1709702898.9291
761
- },
762
- {
763
- "model_a": "CycleDiffusion",
764
- "model_b": "Prompt2prompt",
765
- "winner": "tie (bothbad)",
766
- "judge": "arena_user_::1",
767
- "anony": true,
768
- "tstamp": 1710091925.1861
769
- },
770
- {
771
- "model_a": "MagicBrush",
772
- "model_b": "InstructPix2Pix",
773
- "winner": "tie (bothbad)",
774
- "judge": "arena_user_::1",
775
- "anony": true,
776
- "tstamp": 1710517781.1525
777
- },
778
- {
779
- "model_a": "MagicBrush",
780
- "model_b": "InstructPix2Pix",
781
- "winner": "tie (bothbad)",
782
- "judge": "arena_user_::1",
783
- "anony": false,
784
- "tstamp": 1710517859.2942
785
- },
786
- {
787
- "model_a": "Pix2PixZero",
788
- "model_b": "CycleDiffusion",
789
- "winner": "tie (bothbad)",
790
- "judge": "arena_user_::1",
791
- "anony": true,
792
- "tstamp": 1710535672.9791
793
- },
794
- {
795
- "model_a": "InfEdit",
796
- "model_b": "MagicBrush",
797
- "winner": "model_a",
798
- "judge": "arena_user_10.16.25.191",
799
- "anony": false,
800
- "tstamp": 1714359818.6646
801
- },
802
- {
803
- "model_a": "InstructPix2Pix",
804
- "model_b": "Prompt2prompt",
805
- "winner": "tie (bothbad)",
806
- "judge": "arena_user_10.16.25.191",
807
- "anony": true,
808
- "tstamp": 1714363016.9972
809
- },
810
- {
811
- "model_a": "InfEdit",
812
- "model_b": "CosXLEdit",
813
- "winner": "model_a",
814
- "judge": "arena_user_10.16.25.191",
815
- "anony": true,
816
- "tstamp": 1714715956.3416
817
- },
818
- {
819
- "model_a": "Pix2PixZero",
820
- "model_b": "Prompt2prompt",
821
- "winner": "tie (bothbad)",
822
- "judge": "arena_user_10.16.2.201",
823
- "anony": false,
824
- "tstamp": 1714759928.3804
825
- },
826
- {
827
- "model_a": "PNP",
828
- "model_b": "InstructPix2Pix",
829
- "winner": "model_a",
830
- "judge": "arena_user_10.16.17.217",
831
- "anony": true,
832
- "tstamp": 1715246275.0118
833
- },
834
- {
835
- "model_a": "SDEdit",
836
- "model_b": "CosXLEdit",
837
- "winner": "tie (bothbad)",
838
- "judge": "arena_user_10.16.15.199",
839
- "anony": true,
840
- "tstamp": 1715247590.2235
841
- },
842
- {
843
- "model_a": "CycleDiffusion",
844
- "model_b": "CosXLEdit",
845
- "winner": "model_b",
846
- "judge": "arena_user_10.16.41.118",
847
- "anony": false,
848
- "tstamp": 1715406266.2562
849
- },
850
- {
851
- "model_a": "CycleDiffusion",
852
- "model_b": "CosXLEdit",
853
- "winner": "model_a",
854
- "judge": "arena_user_10.16.41.118",
855
- "anony": false,
856
- "tstamp": 1715406354.5284
857
- },
858
- {
859
- "model_a": "CycleDiffusion",
860
- "model_b": "CosXLEdit",
861
- "winner": "model_b",
862
- "judge": "arena_user_10.16.2.201",
863
- "anony": false,
864
- "tstamp": 1715406371.8227
865
- },
866
- {
867
- "model_a": "CycleDiffusion",
868
- "model_b": "CosXLEdit",
869
- "winner": "model_b",
870
- "judge": "arena_user_10.16.41.118",
871
- "anony": false,
872
- "tstamp": 1715406418.5066
873
- },
874
- {
875
- "model_a": "CycleDiffusion",
876
- "model_b": "CosXLEdit",
877
- "winner": "model_b",
878
- "judge": "arena_user_10.16.25.191",
879
- "anony": false,
880
- "tstamp": 1715406449.9401
881
- },
882
- {
883
- "model_a": "CycleDiffusion",
884
- "model_b": "CosXLEdit",
885
- "winner": "model_b",
886
- "judge": "arena_user_10.16.41.118",
887
- "anony": false,
888
- "tstamp": 1715406466.5778
889
- },
890
- {
891
- "model_a": "InfEdit",
892
- "model_b": "CycleDiffusion",
893
- "winner": "model_a",
894
- "judge": "arena_user_10.16.2.201",
895
- "anony": true,
896
- "tstamp": 1715620708.6361
897
- },
898
- {
899
- "model_a": "Prompt2prompt",
900
- "model_b": "CosXLEdit",
901
- "winner": "model_a",
902
- "judge": "arena_user_10.16.41.118",
903
- "anony": false,
904
- "tstamp": 1715621013.5373
905
- },
906
- {
907
- "model_a": "MagicBrush",
908
- "model_b": "CycleDiffusion",
909
- "winner": "tie (bothbad)",
910
- "judge": "arena_user_10.16.2.201",
911
- "anony": true,
912
- "tstamp": 1715661224.0507
913
- },
914
- {
915
- "model_a": "SDEdit",
916
- "model_b": "PNP",
917
- "winner": "tie (bothbad)",
918
- "judge": "arena_user_10.16.41.118",
919
- "anony": true,
920
- "tstamp": 1715661259.6143
921
- },
922
- {
923
- "model_a": "Pix2PixZero",
924
- "model_b": "Prompt2prompt",
925
- "winner": "tie (bothbad)",
926
- "judge": "arena_user_10.16.41.118",
927
- "anony": true,
928
- "tstamp": 1715661288.6018
929
- },
930
- {
931
- "model_a": "InstructPix2Pix",
932
- "model_b": "Prompt2prompt",
933
- "winner": "model_b",
934
- "judge": "arena_user_10.16.25.191",
935
- "anony": true,
936
- "tstamp": 1715661310.3621
937
- },
938
- {
939
- "model_a": "CosXLEdit",
940
- "model_b": "InstructPix2Pix",
941
- "winner": "tie (bothbad)",
942
- "judge": "arena_user_10.16.25.191",
943
- "anony": true,
944
- "tstamp": 1715718742.1258
945
- },
946
- {
947
- "model_a": "MagicBrush",
948
- "model_b": "PNP",
949
- "winner": "model_a",
950
- "judge": "arena_user_10.16.2.201",
951
- "anony": true,
952
- "tstamp": 1715718773.1054
953
- },
954
- {
955
- "model_a": "SDEdit",
956
- "model_b": "CosXLEdit",
957
- "winner": "tie (bothbad)",
958
- "judge": "arena_user_10.16.2.201",
959
- "anony": true,
960
- "tstamp": 1715718785.2832
961
- },
962
- {
963
- "model_a": "InstructPix2Pix",
964
- "model_b": "SDEdit",
965
- "winner": "tie (bothbad)",
966
- "judge": "arena_user_10.16.2.201",
967
- "anony": true,
968
- "tstamp": 1715718804.143
969
- },
970
- {
971
- "model_a": "InfEdit",
972
- "model_b": "CosXLEdit",
973
- "winner": "model_b",
974
- "judge": "arena_user_10.16.25.191",
975
- "anony": true,
976
- "tstamp": 1715718826.0248
977
- },
978
- {
979
- "model_a": "InfEdit",
980
- "model_b": "Prompt2prompt",
981
- "winner": "model_a",
982
- "judge": "arena_user_10.16.2.201",
983
- "anony": true,
984
- "tstamp": 1715718869.0041
985
- },
986
- {
987
- "model_a": "InfEdit",
988
- "model_b": "CosXLEdit",
989
- "winner": "model_b",
990
- "judge": "arena_user_10.16.2.201",
991
- "anony": true,
992
- "tstamp": 1715718904.9307
993
- },
994
- {
995
- "model_a": "Prompt2prompt",
996
- "model_b": "Pix2PixZero",
997
- "winner": "model_a",
998
- "judge": "arena_user_10.16.25.191",
999
- "anony": true,
1000
- "tstamp": 1715718933.1272
1001
- },
1002
- {
1003
- "model_a": "Pix2PixZero",
1004
- "model_b": "MagicBrush",
1005
- "winner": "tie (bothbad)",
1006
- "judge": "arena_user_10.16.2.201",
1007
- "anony": true,
1008
- "tstamp": 1715718954.8497
1009
- },
1010
- {
1011
- "model_a": "MagicBrush",
1012
- "model_b": "PNP",
1013
- "winner": "model_b",
1014
- "judge": "arena_user_10.16.25.191",
1015
- "anony": true,
1016
- "tstamp": 1715718966.8633
1017
- },
1018
- {
1019
- "model_a": "CycleDiffusion",
1020
- "model_b": "Prompt2prompt",
1021
- "winner": "tie (bothbad)",
1022
- "judge": "arena_user_10.16.25.191",
1023
- "anony": true,
1024
- "tstamp": 1715719000.6673
1025
- },
1026
- {
1027
- "model_a": "MagicBrush",
1028
- "model_b": "Pix2PixZero",
1029
- "winner": "tie (bothbad)",
1030
- "judge": "arena_user_10.16.25.191",
1031
- "anony": true,
1032
- "tstamp": 1715719019.5495
1033
- },
1034
- {
1035
- "model_a": "InfEdit",
1036
- "model_b": "Prompt2prompt",
1037
- "winner": "model_a",
1038
- "judge": "arena_user_10.16.25.191",
1039
- "anony": true,
1040
- "tstamp": 1715719035.903
1041
- },
1042
- {
1043
- "model_a": "MagicBrush",
1044
- "model_b": "Pix2PixZero",
1045
- "winner": "model_a",
1046
- "judge": "arena_user_10.16.25.191",
1047
- "anony": true,
1048
- "tstamp": 1715719046.925
1049
- },
1050
- {
1051
- "model_a": "CycleDiffusion",
1052
- "model_b": "CosXLEdit",
1053
- "winner": "tie (bothbad)",
1054
- "judge": "arena_user_10.16.2.201",
1055
- "anony": true,
1056
- "tstamp": 1715719059.6291
1057
- },
1058
- {
1059
- "model_a": "Prompt2prompt",
1060
- "model_b": "SDEdit",
1061
- "winner": "tie (bothbad)",
1062
- "judge": "arena_user_10.16.15.199",
1063
- "anony": true,
1064
- "tstamp": 1715719076.6727
1065
- },
1066
- {
1067
- "model_a": "MagicBrush",
1068
- "model_b": "PNP",
1069
- "winner": "model_a",
1070
- "judge": "arena_user_10.16.25.191",
1071
- "anony": true,
1072
- "tstamp": 1715719086.7836
1073
- },
1074
- {
1075
- "model_a": "CycleDiffusion",
1076
- "model_b": "MagicBrush",
1077
- "winner": "model_b",
1078
- "judge": "arena_user_10.16.25.191",
1079
- "anony": true,
1080
- "tstamp": 1715719109.8071
1081
- },
1082
- {
1083
- "model_a": "Prompt2prompt",
1084
- "model_b": "InstructPix2Pix",
1085
- "winner": "model_b",
1086
- "judge": "arena_user_10.16.25.191",
1087
- "anony": true,
1088
- "tstamp": 1715719122.8237
1089
- },
1090
- {
1091
- "model_a": "MagicBrush",
1092
- "model_b": "SDEdit",
1093
- "winner": "model_a",
1094
- "judge": "arena_user_10.16.15.199",
1095
- "anony": true,
1096
- "tstamp": 1715719134.1345
1097
- },
1098
- {
1099
- "model_a": "SDEdit",
1100
- "model_b": "CycleDiffusion",
1101
- "winner": "tie (bothbad)",
1102
- "judge": "arena_user_10.16.17.217",
1103
- "anony": true,
1104
- "tstamp": 1715719153.4359
1105
- },
1106
- {
1107
- "model_a": "Pix2PixZero",
1108
- "model_b": "MagicBrush",
1109
- "winner": "tie (bothbad)",
1110
- "judge": "arena_user_10.16.17.217",
1111
- "anony": true,
1112
- "tstamp": 1715719160.5285
1113
- },
1114
- {
1115
- "model_a": "MagicBrush",
1116
- "model_b": "InstructPix2Pix",
1117
- "winner": "model_b",
1118
- "judge": "arena_user_10.16.15.199",
1119
- "anony": true,
1120
- "tstamp": 1715719171.4473
1121
- },
1122
- {
1123
- "model_a": "InstructPix2Pix",
1124
- "model_b": "SDEdit",
1125
- "winner": "tie (bothbad)",
1126
- "judge": "arena_user_10.16.2.201",
1127
- "anony": true,
1128
- "tstamp": 1715719184.6227
1129
- },
1130
- {
1131
- "model_a": "CosXLEdit",
1132
- "model_b": "MagicBrush",
1133
- "winner": "model_a",
1134
- "judge": "arena_user_10.16.2.201",
1135
- "anony": true,
1136
- "tstamp": 1715719210.0429
1137
- },
1138
- {
1139
- "model_a": "CycleDiffusion",
1140
- "model_b": "MagicBrush",
1141
- "winner": "model_b",
1142
- "judge": "arena_user_10.16.41.118",
1143
- "anony": true,
1144
- "tstamp": 1715719219.6447
1145
- },
1146
- {
1147
- "model_a": "PNP",
1148
- "model_b": "Pix2PixZero",
1149
- "winner": "tie (bothbad)",
1150
- "judge": "arena_user_10.16.41.118",
1151
- "anony": true,
1152
- "tstamp": 1715719237.7036
1153
- },
1154
- {
1155
- "model_a": "PNP",
1156
- "model_b": "CycleDiffusion",
1157
- "winner": "tie (bothbad)",
1158
- "judge": "arena_user_10.16.25.191",
1159
- "anony": true,
1160
- "tstamp": 1715719249.4321
1161
- },
1162
- {
1163
- "model_a": "Prompt2prompt",
1164
- "model_b": "Pix2PixZero",
1165
- "winner": "model_a",
1166
- "judge": "arena_user_10.16.2.201",
1167
- "anony": true,
1168
- "tstamp": 1715719257.5877
1169
- },
1170
- {
1171
- "model_a": "CosXLEdit",
1172
- "model_b": "Pix2PixZero",
1173
- "winner": "tie (bothbad)",
1174
- "judge": "arena_user_10.16.25.191",
1175
- "anony": true,
1176
- "tstamp": 1715719273.7637
1177
- },
1178
- {
1179
- "model_a": "PNP",
1180
- "model_b": "CosXLEdit",
1181
- "winner": "model_b",
1182
- "judge": "arena_user_10.16.17.217",
1183
- "anony": true,
1184
- "tstamp": 1715719288.4629
1185
- },
1186
- {
1187
- "model_a": "Pix2PixZero",
1188
- "model_b": "PNP",
1189
- "winner": "model_b",
1190
- "judge": "arena_user_10.16.41.118",
1191
- "anony": true,
1192
- "tstamp": 1715719299.1712
1193
- },
1194
- {
1195
- "model_a": "PNP",
1196
- "model_b": "MagicBrush",
1197
- "winner": "model_b",
1198
- "judge": "arena_user_10.16.2.201",
1199
- "anony": true,
1200
- "tstamp": 1715719306.5928
1201
- },
1202
- {
1203
- "model_a": "InstructPix2Pix",
1204
- "model_b": "PNP",
1205
- "winner": "tie (bothbad)",
1206
- "judge": "arena_user_10.16.15.199",
1207
- "anony": true,
1208
- "tstamp": 1715719356.0694
1209
- },
1210
- {
1211
- "model_a": "Prompt2prompt",
1212
- "model_b": "CosXLEdit",
1213
- "winner": "model_a",
1214
- "judge": "arena_user_10.16.25.191",
1215
- "anony": true,
1216
- "tstamp": 1715719368.0491
1217
- },
1218
- {
1219
- "model_a": "Prompt2prompt",
1220
- "model_b": "CycleDiffusion",
1221
- "winner": "tie (bothbad)",
1222
- "judge": "arena_user_10.16.41.118",
1223
- "anony": true,
1224
- "tstamp": 1715719379.185
1225
- },
1226
- {
1227
- "model_a": "CycleDiffusion",
1228
- "model_b": "Prompt2prompt",
1229
- "winner": "tie (bothbad)",
1230
- "judge": "arena_user_10.16.2.201",
1231
- "anony": true,
1232
- "tstamp": 1715719389.0771
1233
- },
1234
- {
1235
- "model_a": "Pix2PixZero",
1236
- "model_b": "MagicBrush",
1237
- "winner": "model_b",
1238
- "judge": "arena_user_10.16.17.217",
1239
- "anony": true,
1240
- "tstamp": 1715719397.7162
1241
- },
1242
- {
1243
- "model_a": "PNP",
1244
- "model_b": "InstructPix2Pix",
1245
- "winner": "tie (bothbad)",
1246
- "judge": "arena_user_10.16.2.201",
1247
- "anony": true,
1248
- "tstamp": 1715719406.4165
1249
- },
1250
- {
1251
- "model_a": "Pix2PixZero",
1252
- "model_b": "PNP",
1253
- "winner": "model_b",
1254
- "judge": "arena_user_10.16.25.191",
1255
- "anony": true,
1256
- "tstamp": 1715719429.1002
1257
- },
1258
- {
1259
- "model_a": "CosXLEdit",
1260
- "model_b": "MagicBrush",
1261
- "winner": "model_a",
1262
- "judge": "arena_user_10.16.2.201",
1263
- "anony": true,
1264
- "tstamp": 1715719435.4694
1265
- },
1266
- {
1267
- "model_a": "PNP",
1268
- "model_b": "SDEdit",
1269
- "winner": "tie (bothbad)",
1270
- "judge": "arena_user_10.16.2.201",
1271
- "anony": true,
1272
- "tstamp": 1715719454.4526
1273
- },
1274
- {
1275
- "model_a": "InfEdit",
1276
- "model_b": "PNP",
1277
- "winner": "tie (bothbad)",
1278
- "judge": "arena_user_10.16.17.217",
1279
- "anony": true,
1280
- "tstamp": 1715719470.154
1281
- },
1282
- {
1283
- "model_a": "MagicBrush",
1284
- "model_b": "PNP",
1285
- "winner": "model_a",
1286
- "judge": "arena_user_10.16.41.118",
1287
- "anony": true,
1288
- "tstamp": 1715719482.3114
1289
- },
1290
- {
1291
- "model_a": "SDEdit",
1292
- "model_b": "PNP",
1293
- "winner": "tie",
1294
- "judge": "arena_user_10.16.2.201",
1295
- "anony": true,
1296
- "tstamp": 1715719499.9643
1297
- },
1298
- {
1299
- "model_a": "InstructPix2Pix",
1300
- "model_b": "MagicBrush",
1301
- "winner": "model_b",
1302
- "judge": "arena_user_10.16.2.201",
1303
- "anony": true,
1304
- "tstamp": 1715719513.7317
1305
- },
1306
- {
1307
- "model_a": "InfEdit",
1308
- "model_b": "PNP",
1309
- "winner": "model_a",
1310
- "judge": "arena_user_10.16.15.199",
1311
- "anony": true,
1312
- "tstamp": 1715719527.69
1313
- },
1314
- {
1315
- "model_a": "Prompt2prompt",
1316
- "model_b": "MagicBrush",
1317
- "winner": "tie (bothbad)",
1318
- "judge": "arena_user_10.16.41.118",
1319
- "anony": true,
1320
- "tstamp": 1715719542.751
1321
- },
1322
- {
1323
- "model_a": "Pix2PixZero",
1324
- "model_b": "InfEdit",
1325
- "winner": "tie (bothbad)",
1326
- "judge": "arena_user_10.16.15.199",
1327
- "anony": true,
1328
- "tstamp": 1715719560.9912
1329
- },
1330
- {
1331
- "model_a": "PNP",
1332
- "model_b": "Pix2PixZero",
1333
- "winner": "tie (bothbad)",
1334
- "judge": "arena_user_10.16.2.201",
1335
- "anony": true,
1336
- "tstamp": 1715719575.3291
1337
- },
1338
- {
1339
- "model_a": "PNP",
1340
- "model_b": "CosXLEdit",
1341
- "winner": "model_b",
1342
- "judge": "arena_user_10.16.17.217",
1343
- "anony": true,
1344
- "tstamp": 1715719581.9552
1345
- },
1346
- {
1347
- "model_a": "Pix2PixZero",
1348
- "model_b": "Prompt2prompt",
1349
- "winner": "tie (bothbad)",
1350
- "judge": "arena_user_10.16.25.191",
1351
- "anony": true,
1352
- "tstamp": 1715719591.9907
1353
- },
1354
- {
1355
- "model_a": "CosXLEdit",
1356
- "model_b": "SDEdit",
1357
- "winner": "model_a",
1358
- "judge": "arena_user_10.16.2.201",
1359
- "anony": true,
1360
- "tstamp": 1715719601.8819
1361
- },
1362
- {
1363
- "model_a": "InfEdit",
1364
- "model_b": "MagicBrush",
1365
- "winner": "model_b",
1366
- "judge": "arena_user_10.16.41.118",
1367
- "anony": true,
1368
- "tstamp": 1715719612.1837
1369
- },
1370
- {
1371
- "model_a": "SDEdit",
1372
- "model_b": "InstructPix2Pix",
1373
- "winner": "tie (bothbad)",
1374
- "judge": "arena_user_10.16.2.201",
1375
- "anony": true,
1376
- "tstamp": 1715719620.469
1377
- },
1378
- {
1379
- "model_a": "InstructPix2Pix",
1380
- "model_b": "MagicBrush",
1381
- "winner": "tie (bothbad)",
1382
- "judge": "arena_user_10.16.41.118",
1383
- "anony": true,
1384
- "tstamp": 1715719627.34
1385
- },
1386
- {
1387
- "model_a": "MagicBrush",
1388
- "model_b": "Prompt2prompt",
1389
- "winner": "model_a",
1390
- "judge": "arena_user_10.16.2.201",
1391
- "anony": true,
1392
- "tstamp": 1715719632.694
1393
- },
1394
- {
1395
- "model_a": "Prompt2prompt",
1396
- "model_b": "SDEdit",
1397
- "winner": "tie (bothbad)",
1398
- "judge": "arena_user_10.16.41.118",
1399
- "anony": true,
1400
- "tstamp": 1715719652.2038
1401
- },
1402
- {
1403
- "model_a": "SDEdit",
1404
- "model_b": "Prompt2prompt",
1405
- "winner": "tie (bothbad)",
1406
- "judge": "arena_user_10.16.25.191",
1407
- "anony": true,
1408
- "tstamp": 1715719661.8855
1409
- },
1410
- {
1411
- "model_a": "CosXLEdit",
1412
- "model_b": "Prompt2prompt",
1413
- "winner": "tie (bothbad)",
1414
- "judge": "arena_user_10.16.2.201",
1415
- "anony": true,
1416
- "tstamp": 1715719677.2949
1417
- },
1418
- {
1419
- "model_a": "MagicBrush",
1420
- "model_b": "Prompt2prompt",
1421
- "winner": "model_a",
1422
- "judge": "arena_user_10.16.2.201",
1423
- "anony": true,
1424
- "tstamp": 1715719687.3022
1425
- },
1426
- {
1427
- "model_a": "SDEdit",
1428
- "model_b": "Prompt2prompt",
1429
- "winner": "model_b",
1430
- "judge": "arena_user_10.16.2.201",
1431
- "anony": true,
1432
- "tstamp": 1715719699.47
1433
- },
1434
- {
1435
- "model_a": "Pix2PixZero",
1436
- "model_b": "InfEdit",
1437
- "winner": "model_b",
1438
- "judge": "arena_user_10.16.2.201",
1439
- "anony": true,
1440
- "tstamp": 1715719706.2375
1441
- },
1442
- {
1443
- "model_a": "CosXLEdit",
1444
- "model_b": "Prompt2prompt",
1445
- "winner": "model_a",
1446
- "judge": "arena_user_10.16.17.217",
1447
- "anony": true,
1448
- "tstamp": 1715719717.3564
1449
- },
1450
- {
1451
- "model_a": "InstructPix2Pix",
1452
- "model_b": "CosXLEdit",
1453
- "winner": "tie (bothbad)",
1454
- "judge": "arena_user_10.16.25.191",
1455
- "anony": true,
1456
- "tstamp": 1715719722.5542
1457
- },
1458
- {
1459
- "model_a": "InfEdit",
1460
- "model_b": "InstructPix2Pix",
1461
- "winner": "tie (bothbad)",
1462
- "judge": "arena_user_10.16.41.118",
1463
- "anony": true,
1464
- "tstamp": 1715719728.5417
1465
- },
1466
- {
1467
- "model_a": "MagicBrush",
1468
- "model_b": "SDEdit",
1469
- "winner": "model_a",
1470
- "judge": "arena_user_10.16.2.201",
1471
- "anony": true,
1472
- "tstamp": 1715719737.2385
1473
- },
1474
- {
1475
- "model_a": "MagicBrush",
1476
- "model_b": "Pix2PixZero",
1477
- "winner": "tie (bothbad)",
1478
- "judge": "arena_user_10.16.15.199",
1479
- "anony": true,
1480
- "tstamp": 1715815138.5243
1481
- },
1482
- {
1483
- "model_a": "CosXLEdit",
1484
- "model_b": "Prompt2prompt",
1485
- "winner": "model_b",
1486
- "judge": "arena_user_10.16.17.217",
1487
- "anony": true,
1488
- "tstamp": 1715815152.0033
1489
- },
1490
- {
1491
- "model_a": "Pix2PixZero",
1492
- "model_b": "Prompt2prompt",
1493
- "winner": "tie (bothbad)",
1494
- "judge": "arena_user_10.16.41.118",
1495
- "anony": true,
1496
- "tstamp": 1715815169.0475
1497
- },
1498
- {
1499
- "model_a": "InstructPix2Pix",
1500
- "model_b": "SDEdit",
1501
- "winner": "model_b",
1502
- "judge": "arena_user_10.16.41.118",
1503
- "anony": true,
1504
- "tstamp": 1715815187.1917
1505
- },
1506
- {
1507
- "model_a": "InstructPix2Pix",
1508
- "model_b": "Pix2PixZero",
1509
- "winner": "tie (bothbad)",
1510
- "judge": "arena_user_10.16.2.201",
1511
- "anony": true,
1512
- "tstamp": 1715815197.5233
1513
- },
1514
- {
1515
- "model_a": "Pix2PixZero",
1516
- "model_b": "SDEdit",
1517
- "winner": "tie (bothbad)",
1518
- "judge": "arena_user_10.16.2.201",
1519
- "anony": true,
1520
- "tstamp": 1715815209.8285
1521
- },
1522
- {
1523
- "model_a": "CycleDiffusion",
1524
- "model_b": "MagicBrush",
1525
- "winner": "model_b",
1526
- "judge": "arena_user_10.16.2.201",
1527
- "anony": true,
1528
- "tstamp": 1715815228.6736
1529
- },
1530
- {
1531
- "model_a": "InfEdit",
1532
- "model_b": "Pix2PixZero",
1533
- "winner": "tie (bothbad)",
1534
- "judge": "arena_user_10.16.2.201",
1535
- "anony": true,
1536
- "tstamp": 1715815236.3935
1537
- },
1538
- {
1539
- "model_a": "SDEdit",
1540
- "model_b": "PNP",
1541
- "winner": "tie (bothbad)",
1542
- "judge": "arena_user_10.16.25.191",
1543
- "anony": true,
1544
- "tstamp": 1715815265.9705
1545
- },
1546
- {
1547
- "model_a": "MagicBrush",
1548
- "model_b": "SDEdit",
1549
- "winner": "tie (bothbad)",
1550
- "judge": "arena_user_10.16.15.199",
1551
- "anony": true,
1552
- "tstamp": 1715815278.5019
1553
- },
1554
- {
1555
- "model_a": "CycleDiffusion",
1556
- "model_b": "CosXLEdit",
1557
- "winner": "tie (bothbad)",
1558
- "judge": "arena_user_10.16.15.199",
1559
- "anony": true,
1560
- "tstamp": 1715815294.5978
1561
- },
1562
- {
1563
- "model_a": "MagicBrush",
1564
- "model_b": "InfEdit",
1565
- "winner": "model_a",
1566
- "judge": "arena_user_10.16.17.217",
1567
- "anony": true,
1568
- "tstamp": 1715815325.4468
1569
- },
1570
- {
1571
- "model_a": "MagicBrush",
1572
- "model_b": "Pix2PixZero",
1573
- "winner": "model_a",
1574
- "judge": "arena_user_10.16.41.118",
1575
- "anony": true,
1576
- "tstamp": 1715913098.6617
1577
- }
1578
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/results/20240516/elo_results_image_editing.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:837f11fd6cda1fe2d6a5cc1c239a207725ad0157b16282303cb684427ddc7e9d
3
- size 62484
 
 
 
 
arena_elo/results/20240516/image_editing_leaderboard.csv DELETED
@@ -1,10 +0,0 @@
1
- key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
2
- CosXLEdit,CosXLEdit,1097.63559213644,1085.7285800995926,cosxl-nc-community,Stability AI,https://huggingface.co/spaces/multimodalart/cosxl
3
- MagicBrush,MagicBrush,1075.1489922450316,1086.8819832924794,CC-BY-4.0,"The Ohio State University, University of Waterloo",https://osu-nlp-group.github.io/MagicBrush
4
- InfEdit,InfEdit,1065.4719519196174,1090.684638162955,Apache-2.0,"University of Michigan, University of California, Berkeley",https://huggingface.co/spaces/sled-umich/InfEdit
5
- Prompt2prompt,Prompt2prompt,1063.1432047252297,1060.8146250689238,Apache-2.0,"Google, Tel Aviv University",https://prompt-to-prompt.github.io
6
- InstructPix2Pix,InstructPix2Pix,1043.9312648233226,1028.7932718869638,"Copyright 2023 Timothy Brooks, Aleksander Holynski, Alexei A. Efros","University of California, Berkeley",https://www.timothybrooks.com/instruct-pix2pix
7
- PNP,PNP,1022.4342554377677,1043.322342347598,-,Weizmann Institute of Science,https://github.com/MichalGeyer/plug-and-play
8
- Pix2PixZero,Pix2PixZero,891.2979039265506,886.7359371585381,MIT License,"Carnegie Mellon University, Adobe Research",https://pix2pixzero.github.io
9
- SDEdit,SDEdit,890.443823405714,880.5508125882768,MIT License,Stanford University,https://sde-image-editing.github.io
10
- CycleDiffusion,CycleDiffusion,850.4930113803264,836.4878093946726,X11,Carnegie Mellon University,https://github.com/ChenWu98/cycle-diffusion
 
 
 
 
 
 
 
 
 
 
 
arena_elo/results/20240517/clean_battle_t2i_generation.json DELETED
The diff for this file is too large to render. See raw diff
 
arena_elo/results/20240517/elo_results_t2i_generation.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:93808a9ce2f497109d0fc708e4055b6463a692502ef541ff28352f52b612916d
3
- size 68172
 
 
 
 
arena_elo/results/20240517/t2i_generation_leaderboard.csv DELETED
@@ -1,12 +0,0 @@
1
- key,Model,Arena Elo rating (anony),Arena Elo rating (full),License,Organization,Link
2
- PlayGround V2.5,PlayGround V2.5,1136.9514432133128,1081.5838551712898,Playground v2.5 Community License,Playground,https://huggingface.co/playgroundai/playground-v2.5-1024px-aesthetic
3
- PlayGround V2,PlayGround V2,1099.4286233187172,1042.590911846903,Playground v2 Community License,Playground,https://huggingface.co/playgroundai/playground-v2-1024px-aesthetic
4
- SDXLLightning,SDXLLightning,1062.4565867132737,1004.4096880141087,openrail++,ByteDance,https://huggingface.co/ByteDance/SDXL-Lightning
5
- StableCascade,StableCascade,1061.93020315328,1006.1117357811837,stable-cascade-nc-community (other),Stability AI,https://huggingface.co/stabilityai/stable-cascade
6
- PixArtAlpha,PixArtAlpha,1051.847602698194,981.1247821885942,openrail++,PixArt-alpha,https://huggingface.co/PixArt-alpha/PixArt-XL-2-1024-MS
7
- PixArtSigma,PixArtSigma,1049.8339911951734,989.7640320919886,openrail++,PixArt-alpha,https://fal.ai/models/fal-ai/pixart-sigma
8
- SDXL,SDXL,999.6167439144875,941.9623909945509,openrail++,Stability AI,https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
9
- SDXLTurbo,SDXLTurbo,933.468824554199,875.8124778188443,sai-nc-community (other),Stability AI,https://huggingface.co/stabilityai/sdxl-turbo
10
- LCM(v1.5/XL),LCM(v1.5/XL),929.425577747465,865.7356218313212,openrail++,Latent Consistency,https://fal.ai/models/fal-ai/fast-lcm-diffusion/api
11
- OpenJourney,OpenJourney,857.2709081764949,793.4952273226107,creativeml-openrail-m,PromptHero,https://huggingface.co/prompthero/openjourney
12
- LCM,LCM,817.7694953154022,773.4948395309905,MIT License,Tsinghua University,https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7
 
 
 
 
 
 
 
 
 
 
 
 
 
arena_elo/results/20240525/clean_battle_image_editing.json DELETED
The diff for this file is too large to render. See raw diff
 
arena_elo/results/20240525/clean_battle_t2i_generation.json DELETED
The diff for this file is too large to render. See raw diff
 
arena_elo/results/20240525/elo_results_image_editing.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a90694074e1b68a62bd75cdf0c81eb545dfcc115da34e9efdb215d668bd13196
3
- size 62502
 
 
 
 
arena_elo/results/20240525/elo_results_t2i_generation.pkl DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7172486b8454e25f9b3a9df84e55d2dcce923a3b63e091fd8d165b63bbde7bc4
3
- size 68170