Pranjal2041 commited on
Commit
4dfff5c
โ€ข
1 Parent(s): 257090a

Update Website

Browse files
Files changed (4) hide show
  1. .gitattributes +35 -35
  2. README.md +13 -13
  3. app.py +254 -254
  4. constants.py +31 -31
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,13 +1,13 @@
1
- ---
2
- title: GEO Bench
3
- emoji: ๐Ÿ“š
4
- colorFrom: purple
5
- colorTo: indigo
6
- sdk: gradio
7
- sdk_version: 3.50.2
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: GEO Bench
3
+ emoji: ๐Ÿ“š
4
+ colorFrom: purple
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 3.50.2
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -1,255 +1,255 @@
1
- import gradio as gr
2
- import pandas as pd
3
- import os
4
- import itertools
5
- from constants import metric_dict, tags, columns
6
-
7
- # Download from github and load the data
8
-
9
- # TODO: Download every x hours
10
- def download_data(url = "https://github.com/Pranjal2041/GEO/GEO-Bench/leaderboard/leaderboard.jsonl", path = "leaderboard.jsonl"):
11
- ret_code = os.system(f'wget {url} -O {path}_tmp')
12
- if ret_code != 0:
13
- return ret_code
14
- os.system(f'mv {path}_tmp {path}')
15
- return 0
16
-
17
- def search_leaderboard(df, queries):
18
- # Assuming DATA_OVERALL is the DataFrame containing the leaderboard data
19
- # filtered_data = df[df["Method"].str.contains(query, case=False, na=False)]
20
- temp_pds = []
21
- for query in queries:
22
- temp_pds.append(df[df["Method"].str.contains(query, case=False, na=False)])
23
- return pd.concat(temp_pds).drop_duplicates()
24
-
25
- def search_tags_leaderboard(df, tag_blocks, queries):
26
- return search_leaderboard(filter_tags(df, tag_blocks), queries)
27
-
28
- def filter_tags(df, tag_blocks):
29
- def fuzzy_in(x, y_set):
30
- return any(x in z for z in y_set)
31
- all_tags_sets = [set(tag.lower() for tag in tag_block) for tag_block in tag_blocks]
32
-
33
- filtered_rows = [i for i, tags in enumerate(complete_dt['tags']) if all('any' in tag_set or any(fuzzy_in(tag.lower(), tag_set) for tag in tags) for tag_set in all_tags_sets)]
34
-
35
- return prepare_complete_dt(df.iloc[filtered_rows])
36
-
37
- def prepare_complete_dt(complete_dt):
38
- data = []
39
- DATA_OVERALL = complete_dt.copy()
40
- for Method in set(complete_dt['Method']):
41
- data.append([])
42
- data[-1].append(Method)
43
- for metric in metric_dict:
44
- metric_val = metric_dict[metric]
45
- data[-1].append(complete_dt[complete_dt['Method'] == Method][metric_val].mean())
46
- data[-1].append(complete_dt[complete_dt['Method'] == Method]['source'].iloc[0])
47
- DATA_OVERALL = pd.DataFrame(data, columns=columns)
48
- try:
49
- DATA_OVERALL.sort_values(by=['WordPos Overall'], inplace=True, ascending=False)
50
- except: ...
51
- return DATA_OVERALL
52
-
53
- def format_df_for_leaderboard(df):
54
- # The source column needs to be embedded directly into the Method column using appropriate markdown.
55
- df['Method'] = df[['source', 'Method']].apply(lambda x: f'<a target="_blank" style="text-decoration: underline; color: #3571d7;" href="{x[0]}">{x[1]}</a>', axis=1)
56
- # Convert all float metrics to 1 decimal
57
- df_copy = df.copy()
58
- for metric in metric_dict:
59
- df_copy[metric] = df_copy[metric].apply(lambda x: float(f'{(100*x):.1f}'))
60
- # drop the source column
61
- return df_copy.drop(columns=['source'])
62
-
63
-
64
- ret_code = 0
65
- # ret_code = download_data()
66
- if ret_code != 0:
67
- print("Leaderboard Download failed")
68
-
69
- complete_dt = pd.read_json('leaderboard.jsonl', lines=True, orient='records')
70
- DATA_OVERALL = prepare_complete_dt(complete_dt)
71
-
72
-
73
- with gr.Blocks() as demo:
74
-
75
- demo_content = """
76
- <style>
77
- .badge-container {
78
- text-align: center;
79
- display: flex;
80
- justify-content: center;
81
- }
82
- .badge {
83
- margin: 1px;
84
- }
85
- </style>
86
- <h1 style="text-align: center;">GEO-Bench Leaderboard</h1>
87
- <div class="badge-container">
88
- <a href="https://pranjal2041.github.io/geo/" class="badge">
89
- <img src="https://img.shields.io/website?down_message=down&style=for-the-badge&up_message=up&url=https%3A%2F%2Fpranjal2041.github.io/geo/" alt="Website">
90
- </a>
91
- <a href="https://arxiv.org/abs/2310.18xxx" class="badge">
92
- <img src="https://img.shields.io/badge/arXiv-2310.18xxx-red.svg?style=for-the-badge" alt="Arxiv Paper">
93
- </a>
94
- <a href="https://huggingface.co/datasets/Pranjal2041/geo-bench" class="badge">
95
- <img src="https://img.shields.io/badge/Dataset-GEO-%2DBENCH-orange?style=for-the-badge" alt="Dataset">
96
- </a>
97
- <a href="https://github.com/Pranjal2041/GEO" class="badge">
98
- <img src="https://img.shields.io/badge/Github-Code-green?style=for-the-badge" alt="Code">
99
- </a>
100
- </div>
101
- <p>
102
- - For benchmarking content optimization Methods for Generative Engines.<br>
103
- - GEO-Bench evaluates Methods for optimizing website content to improve visibility in generative engine responses. Benchmark contains 10K queries across 9 datasets covering diverse domains and intents.<br>
104
- - Refer to GEO paper for more <a href="https://arxiv.org/abs/2310.18xxx">details</a>
105
- </p>
106
- """
107
-
108
-
109
- gr.HTML(demo_content)
110
-
111
-
112
-
113
-
114
- with gr.Tabs():
115
-
116
- with gr.TabItem('Overall ๐Ÿ“Š'):
117
-
118
- with gr.Row():
119
- gr.Markdown('## Overall Leaderboard')
120
-
121
- with gr.Row():
122
- data_overall = gr.components.Dataframe(
123
- format_df_for_leaderboard(DATA_OVERALL),
124
- datatype=["markdown"] + ["number"] * (len(DATA_OVERALL.columns) - 2) + ['markdown'],
125
- type="pandas",
126
- wrap=True,
127
- interactive=False,
128
- )
129
- # data_overall.
130
-
131
- with gr.Row():
132
- # search_bar = gr.Textbox(type="text", label="Search for a Method:")
133
- search_bar = gr.Textbox(
134
- placeholder=" ๐Ÿ” Search for your Method (separate multiple queries with `,`) and press ENTER...",
135
- show_label=False,
136
- elem_id="search-bar",
137
- )
138
-
139
- def search_button_click(query):
140
- filtered_data = search_leaderboard(DATA_OVERALL, [x.strip() for x in query.split(',')])
141
- return format_df_for_leaderboard(filtered_data)
142
-
143
- with gr.TabItem('Tag-Wise Results ๐Ÿ“Š'):
144
- with gr.Row():
145
- gr.Markdown(f"""
146
- ## Tag-Wise Results
147
- - The following table shows the results for each tag.
148
- - The tags are sorted in the order of their performance.
149
- - The table is sorted in the order of the overall score.
150
- """)
151
- with gr.Row():
152
-
153
- search_bar_tag = gr.Textbox(
154
- placeholder=" ๐Ÿ” Search for your Method (separate multiple queries with `,`) and press ENTER...",
155
- show_label=False,
156
- elem_id="search-bar",
157
- )
158
-
159
- def search_button_click(query):
160
- filtered_data = search_leaderboard(DATA_OVERALL, [x.strip() for x in query.split(',')])
161
- return format_df_for_leaderboard(filtered_data)
162
-
163
- with gr.Row():
164
- boxes = dict()
165
- with gr.Column(min_width=320):
166
- for tag in list(tags.keys())[:3]:
167
- with gr.Box(elem_id="box-filter"):
168
- boxes[tag] = gr.CheckboxGroup(
169
- label=tag,
170
- choices=tags[tag],
171
- value=tags[tag],
172
- interactive=True,
173
- elem_id=f"filter-{tag}",
174
- )
175
- with gr.Column(min_width=320):
176
- for tag in list(tags.keys())[4:]:
177
- with gr.Box(elem_id="box-filter"):
178
- boxes[tag] = gr.CheckboxGroup(
179
- label=tag,
180
- choices=tags[tag],
181
- value=tags[tag],
182
- interactive=True,
183
- elem_id=f"filter-{tag}",
184
- )
185
- with gr.Row():
186
- tag = list(tags.keys())[3]
187
- with gr.Box(elem_id="box-filter"):
188
- boxes[tag] = gr.CheckboxGroup(
189
- label=tag,
190
- choices=tags[tag],
191
- value=tags[tag],
192
- interactive=True,
193
- elem_id=f"filter-{tag}",
194
- )
195
- with gr.Row():
196
- data_tag_wise = gr.components.Dataframe(
197
- format_df_for_leaderboard(DATA_OVERALL),
198
- datatype=["markdown"] + ["number"] * (len(DATA_OVERALL.columns) - 2) + ['markdown'],
199
- type="pandas",
200
- wrap=True,
201
- interactive=False,
202
- )
203
- def filter_tag_click(*boxes):
204
- return format_df_for_leaderboard(filter_tags(complete_dt, list(boxes)))
205
- def search_tag_click(query, *boxes):
206
- return format_df_for_leaderboard(search_tags_leaderboard(complete_dt, list(boxes), [x.strip() for x in query.split(',')]))
207
- for box in boxes:
208
- boxes[box].change(fn=filter_tag_click, inputs=list(boxes.values()), outputs=data_tag_wise)
209
- search_bar_tag.submit(fn=search_tag_click, inputs=[search_bar_tag] + list(boxes.values()), outputs=data_tag_wise)
210
-
211
- with gr.TabItem('About GEO-bench ๐Ÿ“–'):
212
- with gr.Row():
213
- gr.Markdown(f"""
214
- ## About GEO-bench
215
- - GEO-bench is a benchmarking platform for content optimization Methods for generative engines.
216
- - It is a part of the work released under [GEO](https://arxiv.org/abs/2310.18xxx)
217
- - The benchmark comprises of 9 datasets, 7 of which were publicly available, while 2 have been released by us.
218
- - Dataset can be downloaded from [here](huggingface.co/datasets/pranjal2041/geo-bench)""")
219
-
220
- with gr.Row():
221
-
222
- # Goal of benchmarking content optimization for generative engines
223
- # Contains 10K carefully curated queries
224
- # Queries are diverse and cover many domains/intents
225
- # Annotated with tags/dimensions like domain, difficulty, etc.
226
- # Above list in HTML format
227
- gr.HTML(f"""
228
- <h3>Key-Highlights of GEO-bench</h3>
229
- <ul>
230
- <li>Goal of benchmarking content optimization for generative engines</li>
231
- <li>Contains 10K carefully curated queries</li>
232
- <li>Queries are diverse and cover many domains/intents</li>
233
- <li>Annotated with tags/dimensions like domain, difficulty, etc.</li>
234
- </ul>
235
- """)
236
-
237
- # Benchmark Link:
238
- # gr.Markdown(f"""### Benchmark Link: [GEO-bench](huggingface.co/datasets/pranjal2041/geo-bench)""")
239
-
240
- # Info about tags and other statistics
241
-
242
-
243
- with gr.TabItem('Submit ๐Ÿ“'):
244
- with gr.Row():
245
- gr.Markdown(f"""
246
- ## Submit
247
- - To submit your Method, please check [here](github.com/Pranjal2041/GEO/GEO-Bench/leaderboard/Readme.md)""")
248
-
249
-
250
- # Create a form to submit, the response should be sent to a google form
251
-
252
- search_bar.submit(fn=search_button_click, inputs=search_bar, outputs=data_overall)
253
-
254
- if __name__ == "__main__":
255
  demo.launch()
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import os
4
+ import itertools
5
+ from constants import metric_dict, tags, columns
6
+
7
+ # Download from github and load the data
8
+
9
+ # TODO: Download every x hours
10
+ def download_data(url = "https://github.com/Pranjal2041/GEO/GEO-Bench/leaderboard/leaderboard.jsonl", path = "leaderboard.jsonl"):
11
+ ret_code = os.system(f'wget {url} -O {path}_tmp')
12
+ if ret_code != 0:
13
+ return ret_code
14
+ os.system(f'mv {path}_tmp {path}')
15
+ return 0
16
+
17
+ def search_leaderboard(df, queries):
18
+ # Assuming DATA_OVERALL is the DataFrame containing the leaderboard data
19
+ # filtered_data = df[df["Method"].str.contains(query, case=False, na=False)]
20
+ temp_pds = []
21
+ for query in queries:
22
+ temp_pds.append(df[df["Method"].str.contains(query, case=False, na=False)])
23
+ return pd.concat(temp_pds).drop_duplicates()
24
+
25
+ def search_tags_leaderboard(df, tag_blocks, queries):
26
+ return search_leaderboard(filter_tags(df, tag_blocks), queries)
27
+
28
+ def filter_tags(df, tag_blocks):
29
+ def fuzzy_in(x, y_set):
30
+ return any(x in z for z in y_set)
31
+ all_tags_sets = [set(tag.lower() for tag in tag_block) for tag_block in tag_blocks]
32
+
33
+ filtered_rows = [i for i, tags in enumerate(complete_dt['tags']) if all('any' in tag_set or any(fuzzy_in(tag.lower(), tag_set) for tag in tags) for tag_set in all_tags_sets)]
34
+
35
+ return prepare_complete_dt(df.iloc[filtered_rows])
36
+
37
+ def prepare_complete_dt(complete_dt):
38
+ data = []
39
+ DATA_OVERALL = complete_dt.copy()
40
+ for Method in set(complete_dt['Method']):
41
+ data.append([])
42
+ data[-1].append(Method)
43
+ for metric in metric_dict:
44
+ metric_val = metric_dict[metric]
45
+ data[-1].append(complete_dt[complete_dt['Method'] == Method][metric_val].mean())
46
+ data[-1].append(complete_dt[complete_dt['Method'] == Method]['source'].iloc[0])
47
+ DATA_OVERALL = pd.DataFrame(data, columns=columns)
48
+ try:
49
+ DATA_OVERALL.sort_values(by=['WordPos Overall'], inplace=True, ascending=False)
50
+ except: ...
51
+ return DATA_OVERALL
52
+
53
+ def format_df_for_leaderboard(df):
54
+ # The source column needs to be embedded directly into the Method column using appropriate markdown.
55
+ df['Method'] = df[['source', 'Method']].apply(lambda x: f'<a target="_blank" style="text-decoration: underline; color: #3571d7;" href="{x[0]}">{x[1]}</a>', axis=1)
56
+ # Convert all float metrics to 1 decimal
57
+ df_copy = df.copy()
58
+ for metric in metric_dict:
59
+ df_copy[metric] = df_copy[metric].apply(lambda x: float(f'{(100*x):.1f}'))
60
+ # drop the source column
61
+ return df_copy.drop(columns=['source'])
62
+
63
+
64
+ ret_code = 0
65
+ # ret_code = download_data()
66
+ if ret_code != 0:
67
+ print("Leaderboard Download failed")
68
+
69
+ complete_dt = pd.read_json('leaderboard.jsonl', lines=True, orient='records')
70
+ DATA_OVERALL = prepare_complete_dt(complete_dt)
71
+
72
+
73
+ with gr.Blocks() as demo:
74
+
75
+ demo_content = """
76
+ <style>
77
+ .badge-container {
78
+ text-align: center;
79
+ display: flex;
80
+ justify-content: center;
81
+ }
82
+ .badge {
83
+ margin: 1px;
84
+ }
85
+ </style>
86
+ <h1 style="text-align: center;">GEO-Bench Leaderboard</h1>
87
+ <div class="badge-container">
88
+ <a href="https://pranjal2041.github.io/GEO/" class="badge">
89
+ <img src="https://img.shields.io/website?down_message=down&style=for-the-badge&up_message=up&url=https%3A%2F%2Fpranjal2041.github.io/GEO/" alt="Website">
90
+ </a>
91
+ <a href="https://arxiv.org/abs/2310.18xxx" class="badge">
92
+ <img src="https://img.shields.io/badge/arXiv-2310.18xxx-red.svg?style=for-the-badge" alt="Arxiv Paper">
93
+ </a>
94
+ <a href="https://huggingface.co/datasets/Pranjal2041/geo-bench" class="badge">
95
+ <img src="https://img.shields.io/badge/Dataset-GEO-%2DBENCH-orange?style=for-the-badge" alt="Dataset">
96
+ </a>
97
+ <a href="https://github.com/Pranjal2041/GEO" class="badge">
98
+ <img src="https://img.shields.io/badge/Github-Code-green?style=for-the-badge" alt="Code">
99
+ </a>
100
+ </div>
101
+ <p>
102
+ - For benchmarking content optimization Methods for Generative Engines.<br>
103
+ - GEO-Bench evaluates Methods for optimizing website content to improve visibility in generative engine responses. Benchmark contains 10K queries across 9 datasets covering diverse domains and intents.<br>
104
+ - Refer to GEO paper for more <a href="https://arxiv.org/abs/2310.18xxx">details</a>
105
+ </p>
106
+ """
107
+
108
+
109
+ gr.HTML(demo_content)
110
+
111
+
112
+
113
+
114
+ with gr.Tabs():
115
+
116
+ with gr.TabItem('Overall ๐Ÿ“Š'):
117
+
118
+ with gr.Row():
119
+ gr.Markdown('## Overall Leaderboard')
120
+
121
+ with gr.Row():
122
+ data_overall = gr.components.Dataframe(
123
+ format_df_for_leaderboard(DATA_OVERALL),
124
+ datatype=["markdown"] + ["number"] * (len(DATA_OVERALL.columns) - 2) + ['markdown'],
125
+ type="pandas",
126
+ wrap=True,
127
+ interactive=False,
128
+ )
129
+ # data_overall.
130
+
131
+ with gr.Row():
132
+ # search_bar = gr.Textbox(type="text", label="Search for a Method:")
133
+ search_bar = gr.Textbox(
134
+ placeholder=" ๐Ÿ” Search for your Method (separate multiple queries with `,`) and press ENTER...",
135
+ show_label=False,
136
+ elem_id="search-bar",
137
+ )
138
+
139
+ def search_button_click(query):
140
+ filtered_data = search_leaderboard(DATA_OVERALL, [x.strip() for x in query.split(',')])
141
+ return format_df_for_leaderboard(filtered_data)
142
+
143
+ with gr.TabItem('Tag-Wise Results ๐Ÿ“Š'):
144
+ with gr.Row():
145
+ gr.Markdown(f"""
146
+ ## Tag-Wise Results
147
+ - The following table shows the results for each tag.
148
+ - The tags are sorted in the order of their performance.
149
+ - The table is sorted in the order of the overall score.
150
+ """)
151
+ with gr.Row():
152
+
153
+ search_bar_tag = gr.Textbox(
154
+ placeholder=" ๐Ÿ” Search for your Method (separate multiple queries with `,`) and press ENTER...",
155
+ show_label=False,
156
+ elem_id="search-bar",
157
+ )
158
+
159
+ def search_button_click(query):
160
+ filtered_data = search_leaderboard(DATA_OVERALL, [x.strip() for x in query.split(',')])
161
+ return format_df_for_leaderboard(filtered_data)
162
+
163
+ with gr.Row():
164
+ boxes = dict()
165
+ with gr.Column(min_width=320):
166
+ for tag in list(tags.keys())[:3]:
167
+ with gr.Box(elem_id="box-filter"):
168
+ boxes[tag] = gr.CheckboxGroup(
169
+ label=tag,
170
+ choices=tags[tag],
171
+ value=tags[tag],
172
+ interactive=True,
173
+ elem_id=f"filter-{tag}",
174
+ )
175
+ with gr.Column(min_width=320):
176
+ for tag in list(tags.keys())[4:]:
177
+ with gr.Box(elem_id="box-filter"):
178
+ boxes[tag] = gr.CheckboxGroup(
179
+ label=tag,
180
+ choices=tags[tag],
181
+ value=tags[tag],
182
+ interactive=True,
183
+ elem_id=f"filter-{tag}",
184
+ )
185
+ with gr.Row():
186
+ tag = list(tags.keys())[3]
187
+ with gr.Box(elem_id="box-filter"):
188
+ boxes[tag] = gr.CheckboxGroup(
189
+ label=tag,
190
+ choices=tags[tag],
191
+ value=tags[tag],
192
+ interactive=True,
193
+ elem_id=f"filter-{tag}",
194
+ )
195
+ with gr.Row():
196
+ data_tag_wise = gr.components.Dataframe(
197
+ format_df_for_leaderboard(DATA_OVERALL),
198
+ datatype=["markdown"] + ["number"] * (len(DATA_OVERALL.columns) - 2) + ['markdown'],
199
+ type="pandas",
200
+ wrap=True,
201
+ interactive=False,
202
+ )
203
+ def filter_tag_click(*boxes):
204
+ return format_df_for_leaderboard(filter_tags(complete_dt, list(boxes)))
205
+ def search_tag_click(query, *boxes):
206
+ return format_df_for_leaderboard(search_tags_leaderboard(complete_dt, list(boxes), [x.strip() for x in query.split(',')]))
207
+ for box in boxes:
208
+ boxes[box].change(fn=filter_tag_click, inputs=list(boxes.values()), outputs=data_tag_wise)
209
+ search_bar_tag.submit(fn=search_tag_click, inputs=[search_bar_tag] + list(boxes.values()), outputs=data_tag_wise)
210
+
211
+ with gr.TabItem('About GEO-bench ๐Ÿ“–'):
212
+ with gr.Row():
213
+ gr.Markdown(f"""
214
+ ## About GEO-bench
215
+ - GEO-bench is a benchmarking platform for content optimization Methods for generative engines.
216
+ - It is a part of the work released under [GEO](https://arxiv.org/abs/2310.18xxx)
217
+ - The benchmark comprises of 9 datasets, 7 of which were publicly available, while 2 have been released by us.
218
+ - Dataset can be downloaded from [here](huggingface.co/datasets/pranjal2041/geo-bench)""")
219
+
220
+ with gr.Row():
221
+
222
+ # Goal of benchmarking content optimization for generative engines
223
+ # Contains 10K carefully curated queries
224
+ # Queries are diverse and cover many domains/intents
225
+ # Annotated with tags/dimensions like domain, difficulty, etc.
226
+ # Above list in HTML format
227
+ gr.HTML(f"""
228
+ <h3>Key-Highlights of GEO-bench</h3>
229
+ <ul>
230
+ <li>Goal of benchmarking content optimization for generative engines</li>
231
+ <li>Contains 10K carefully curated queries</li>
232
+ <li>Queries are diverse and cover many domains/intents</li>
233
+ <li>Annotated with tags/dimensions like domain, difficulty, etc.</li>
234
+ </ul>
235
+ """)
236
+
237
+ # Benchmark Link:
238
+ # gr.Markdown(f"""### Benchmark Link: [GEO-bench](huggingface.co/datasets/pranjal2041/geo-bench)""")
239
+
240
+ # Info about tags and other statistics
241
+
242
+
243
+ with gr.TabItem('Submit ๐Ÿ“'):
244
+ with gr.Row():
245
+ gr.Markdown(f"""
246
+ ## Submit
247
+ - To submit your Method, please check [here](github.com/Pranjal2041/GEO/GEO-Bench/leaderboard/Readme.md)""")
248
+
249
+
250
+ # Create a form to submit, the response should be sent to a google form
251
+
252
+ search_bar.submit(fn=search_button_click, inputs=search_bar, outputs=data_overall)
253
+
254
+ if __name__ == "__main__":
255
  demo.launch()
constants.py CHANGED
@@ -1,31 +1,31 @@
1
- # metrics = ['relevance_detailed', 'uniqueness_detailed', 'subjcount_detailed', 'follow_detailed', 'simple_wordpos', 'simple_pos', 'influence_detailed', 'subjective_score', 'diversity_detailed', 'simple_word', 'subjpos_detailed']
2
- columns = ['Method', 'Word', 'Position', 'WordPos Overall', 'Rel.', 'Infl.', 'Unique', 'Div.', 'FollowUp', 'Pos.', 'Count', 'Subjective Average', 'source']
3
- metric_dict = {
4
- 'Word': 'simple_word',
5
- 'Position': 'simple_pos',
6
- 'WordPos Overall': 'simple_wordpos',
7
- 'Rel.': 'relevance_detailed',
8
- 'Infl.': 'influence_detailed',
9
- 'Unique': 'uniqueness_detailed',
10
- 'Div.': 'diversity_detailed',
11
- 'FollowUp': 'follow_detailed',
12
- 'Pos.': 'subjpos_detailed',
13
- 'Count': 'subjcount_detailed',
14
- 'Subjective Average': 'subjective_score',
15
- }
16
-
17
- tags = {
18
- "Difficulty Level": ["Simple", "Intermediate", "Complex", "Multi-faceted", "Open-ended", 'any'],
19
- "Nature of Query": ["Informational", "Navigational", "Transactional", "Debate", "Opinion", "Comparison", "Instructional", "Descriptive", "Predictive", 'any'],
20
- "Sensitivity": ["Sensitive", "Non-sensitive",'any'],
21
- "Genre": [
22
- "๐ŸŽญ Arts and Entertainment", "๐Ÿš— Autos and Vehicles", "๐Ÿ’„ Beauty and Fitness", "๐Ÿ“š Books and Literature", "๐Ÿข Business and Industrial",
23
- "๐Ÿ’ป Computers and Electronics", "๐Ÿ’ฐ Finance", "๐Ÿ” Food and Drink", "๐ŸŽฎ Games", "๐Ÿฅ Health", "๐ŸŽจ Hobbies and Leisure", "๐Ÿก Home and Garden",
24
- "๐ŸŒ Internet and Telecom", "๐ŸŽ“ Jobs and Education", "๐Ÿ›๏ธ Law and Government", "๐Ÿ“ฐ News", "๐Ÿ’ฌ Online Communities", "๐Ÿ‘ซ People and Society",
25
- "๐Ÿพ Pets and Animals", "๐Ÿก Real Estate", "๐Ÿ“š Reference", "๐Ÿ”ฌ Science", "๐Ÿ›’ Shopping", "โšฝ Sports", "โœˆ๏ธ Travel",'any'
26
- ],
27
- "Specific Topics": ["Physics", "Chemistry", "Biology", "Mathematics", "Computer Science", "Economics", 'any'],
28
- "User Intent": ["๐Ÿ” Research", "๐Ÿ’ฐ Purchase", "๐ŸŽ‰ Entertainment", "๐Ÿ“š Learning", "๐Ÿ”„ Comparison", 'any'],
29
- "Answer Type": ["Fact", "Opinion", "List", "Explanation", "Guide", "Comparison", "Prediction", 'any'],
30
- }
31
-
 
1
+ # metrics = ['relevance_detailed', 'uniqueness_detailed', 'subjcount_detailed', 'follow_detailed', 'simple_wordpos', 'simple_pos', 'influence_detailed', 'subjective_score', 'diversity_detailed', 'simple_word', 'subjpos_detailed']
2
+ columns = ['Method', 'Word', 'Position', 'WordPos Overall', 'Rel.', 'Infl.', 'Unique', 'Div.', 'FollowUp', 'Pos.', 'Count', 'Subjective Average', 'source']
3
+ metric_dict = {
4
+ 'Word': 'simple_word',
5
+ 'Position': 'simple_pos',
6
+ 'WordPos Overall': 'simple_wordpos',
7
+ 'Rel.': 'relevance_detailed',
8
+ 'Infl.': 'influence_detailed',
9
+ 'Unique': 'uniqueness_detailed',
10
+ 'Div.': 'diversity_detailed',
11
+ 'FollowUp': 'follow_detailed',
12
+ 'Pos.': 'subjpos_detailed',
13
+ 'Count': 'subjcount_detailed',
14
+ 'Subjective Average': 'subjective_score',
15
+ }
16
+
17
+ tags = {
18
+ "Difficulty Level": ["Simple", "Intermediate", "Complex", "Multi-faceted", "Open-ended", 'any'],
19
+ "Nature of Query": ["Informational", "Navigational", "Transactional", "Debate", "Opinion", "Comparison", "Instructional", "Descriptive", "Predictive", 'any'],
20
+ "Sensitivity": ["Sensitive", "Non-sensitive",'any'],
21
+ "Genre": [
22
+ "๐ŸŽญ Arts and Entertainment", "๐Ÿš— Autos and Vehicles", "๐Ÿ’„ Beauty and Fitness", "๐Ÿ“š Books and Literature", "๐Ÿข Business and Industrial",
23
+ "๐Ÿ’ป Computers and Electronics", "๐Ÿ’ฐ Finance", "๐Ÿ” Food and Drink", "๐ŸŽฎ Games", "๐Ÿฅ Health", "๐ŸŽจ Hobbies and Leisure", "๐Ÿก Home and Garden",
24
+ "๐ŸŒ Internet and Telecom", "๐ŸŽ“ Jobs and Education", "๐Ÿ›๏ธ Law and Government", "๐Ÿ“ฐ News", "๐Ÿ’ฌ Online Communities", "๐Ÿ‘ซ People and Society",
25
+ "๐Ÿพ Pets and Animals", "๐Ÿก Real Estate", "๐Ÿ“š Reference", "๐Ÿ”ฌ Science", "๐Ÿ›’ Shopping", "โšฝ Sports", "โœˆ๏ธ Travel",'any'
26
+ ],
27
+ "Specific Topics": ["Physics", "Chemistry", "Biology", "Mathematics", "Computer Science", "Economics", 'any'],
28
+ "User Intent": ["๐Ÿ” Research", "๐Ÿ’ฐ Purchase", "๐ŸŽ‰ Entertainment", "๐Ÿ“š Learning", "๐Ÿ”„ Comparison", 'any'],
29
+ "Answer Type": ["Fact", "Opinion", "List", "Explanation", "Guide", "Comparison", "Prediction", 'any'],
30
+ }
31
+