Spaces:

Pranjal2041
/

GEO-bench

Sleeping

App Files Files Community

GEO-bench / app.py

Pranjal2041

Potential Fix

dde4acb 9 months ago

raw

history blame contribute delete

No virus

11.3 kB

	import gradio as gr
	import pandas as pd
	import os
	import itertools
	from constants import metric_dict, tags, columns

	# Download from github and load the data

	# TODO: Download every x hours
	def download_data(url = "https://github.com/Pranjal2041/GEO/GEO-Bench/leaderboard/leaderboard.jsonl", path = "leaderboard.jsonl"):
	ret_code = os.system(f'wget {url} -O {path}_tmp')
	if ret_code != 0:
	return ret_code
	os.system(f'mv {path}_tmp {path}')
	return 0

	def search_leaderboard(df, queries):
	# Assuming DATA_OVERALL is the DataFrame containing the leaderboard data
	# filtered_data = df[df["Method"].str.contains(query, case=False, na=False)]
	temp_pds = []
	for query in queries:
	temp_pds.append(df[df["Method"].str.contains(query, case=False, na=False)])
	return pd.concat(temp_pds).drop_duplicates()

	def search_tags_leaderboard(df, tag_blocks, queries):
	return search_leaderboard(filter_tags(df, tag_blocks), queries)

	def filter_tags(df, tag_blocks):
	def fuzzy_in(x, y_set):
	return any(x in z for z in y_set)
	all_tags_sets = [set(tag.lower() for tag in tag_block) for tag_block in tag_blocks]

	filtered_rows = [i for i, tags in enumerate(complete_dt['tags']) if all('any' in tag_set or any(fuzzy_in(tag.lower(), tag_set) for tag in tags) for tag_set in all_tags_sets)]

	return prepare_complete_dt(df.iloc[filtered_rows])

	def prepare_complete_dt(complete_dt):
	data = []
	DATA_OVERALL = complete_dt.copy()
	for Method in set(complete_dt['Method']):
	data.append([])
	data[-1].append(Method)
	for metric in metric_dict:
	metric_val = metric_dict[metric]
	data[-1].append(complete_dt[complete_dt['Method'] == Method][metric_val].mean())
	data[-1].append(complete_dt[complete_dt['Method'] == Method]['source'].iloc[0])
	DATA_OVERALL = pd.DataFrame(data, columns=columns)
	try:
	DATA_OVERALL.sort_values(by=['WordPos Overall'], inplace=True, ascending=False)
	except: ...
	return DATA_OVERALL

	def format_df_for_leaderboard(df):
	# The source column needs to be embedded directly into the Method column using appropriate markdown.
	df['Method'] = df[['source', 'Method']].apply(lambda x: f'<a target="_blank" style="text-decoration: underline; color: #3571d7;" href="{x[0]}">{x[1]}</a>', axis=1)
	# Convert all float metrics to 1 decimal
	df_copy = df.copy()
	for metric in metric_dict:
	df_copy[metric] = df_copy[metric].apply(lambda x: float(f'{(100*x):.1f}'))
	# drop the source column
	return df_copy.drop(columns=['source'])


	ret_code = 0
	# ret_code = download_data()
	if ret_code != 0:
	print("Leaderboard Download failed")

	complete_dt = pd.read_json('leaderboard.jsonl', lines=True, orient='records')
	DATA_OVERALL = prepare_complete_dt(complete_dt)


	with gr.Blocks() as demo:

	demo_content = """
	<style>
	.badge-container {
	text-align: center;
	display: flex;
	justify-content: center;
	}
	.badge {
	margin: 1px;
	}
	</style>
	<h1 style="text-align: center;">GEO-Bench Leaderboard</h1>
	<div class="badge-container">
	<a href="https://pranjal2041.github.io/GEO/" class="badge">
	<img src="https://img.shields.io/website?down_message=down&style=for-the-badge&up_message=up&url=https%3A%2F%2Fpranjal2041.github.io/GEO/" alt="Website">
	</a>
	<a href="https://arxiv.org/abs/2311.09735" class="badge">
	<img src="https://img.shields.io/badge/arXiv-2311.09735-red.svg?style=for-the-badge" alt="Arxiv Paper">
	</a>
	<a href="https://huggingface.co/datasets/GEO-optim/geo-bench" class="badge">
	<img src="https://img.shields.io/badge/Dataset-GEO-%2DBENCH-orange?style=for-the-badge" alt="Dataset">
	</a>
	<a href="https://github.com/Pranjal2041/GEO" class="badge">
	<img src="https://img.shields.io/badge/Github-Code-green?style=for-the-badge" alt="Code">
	</a>
	</div>
	<p>
	- For benchmarking content optimization Methods for Generative Engines.<br>
	- GEO-Bench evaluates Methods for optimizing website content to improve visibility in generative engine responses. Benchmark contains 10K queries across 9 datasets covering diverse domains and intents.<br>
	- Refer to GEO paper for more <a href="https://arxiv.org/abs/2311.09735">details</a>
	</p>
	"""


	gr.HTML(demo_content)




	with gr.Tabs():

	with gr.TabItem('Overall 📊'):

	with gr.Row():
	gr.Markdown('## Overall Leaderboard')

	with gr.Row():
	data_overall = gr.components.Dataframe(
	format_df_for_leaderboard(DATA_OVERALL),
	datatype=["markdown"] + ["number"] * (len(DATA_OVERALL.columns) - 2) + ['markdown'],
	type="pandas",
	wrap=True,
	interactive=False,
	)
	# data_overall.

	with gr.Row():
	# search_bar = gr.Textbox(type="text", label="Search for a Method:")
	search_bar = gr.Textbox(
	placeholder=" 🔍 Search for your Method (separate multiple queries with `,`) and press ENTER...",
	show_label=False,
	elem_id="search-bar",
	)

	def search_button_click(query):
	filtered_data = search_leaderboard(DATA_OVERALL, [x.strip() for x in query.split(',')])
	return format_df_for_leaderboard(filtered_data)

	with gr.TabItem('Tag-Wise Results 📊'):
	with gr.Row():
	gr.Markdown(f"""
	## Tag-Wise Results
	- The following table shows the results for each tag.
	- The tags are sorted in the order of their performance.
	- The table is sorted in the order of the overall score.
	""")
	with gr.Row():

	search_bar_tag = gr.Textbox(
	placeholder=" 🔍 Search for your Method (separate multiple queries with `,`) and press ENTER...",
	show_label=False,
	elem_id="search-bar",
	)

	def search_button_click(query):
	filtered_data = search_leaderboard(DATA_OVERALL, [x.strip() for x in query.split(',')])
	return format_df_for_leaderboard(filtered_data)

	with gr.Row():
	boxes = dict()
	with gr.Column(min_width=320):
	for tag in list(tags.keys())[:3]:
	with gr.Box(elem_id="box-filter"):
	boxes[tag] = gr.CheckboxGroup(
	label=tag,
	choices=tags[tag],
	value=tags[tag],
	interactive=True,
	elem_id=f"filter-{tag}",
	)
	with gr.Column(min_width=320):
	for tag in list(tags.keys())[4:]:
	with gr.Box(elem_id="box-filter"):
	boxes[tag] = gr.CheckboxGroup(
	label=tag,
	choices=tags[tag],
	value=tags[tag],
	interactive=True,
	elem_id=f"filter-{tag}",
	)
	with gr.Row():
	tag = list(tags.keys())[3]
	with gr.Box(elem_id="box-filter"):
	boxes[tag] = gr.CheckboxGroup(
	label=tag,
	choices=tags[tag],
	value=tags[tag],
	interactive=True,
	elem_id=f"filter-{tag}",
	)
	with gr.Row():
	data_tag_wise = gr.components.Dataframe(
	format_df_for_leaderboard(DATA_OVERALL),
	datatype=["markdown"] + ["number"] * (len(DATA_OVERALL.columns) - 2) + ['markdown'],
	type="pandas",
	wrap=True,
	interactive=False,
	)
	def filter_tag_click(*boxes):
	return format_df_for_leaderboard(filter_tags(complete_dt, list(boxes)))
	def search_tag_click(query, *boxes):
	return format_df_for_leaderboard(search_tags_leaderboard(complete_dt, list(boxes), [x.strip() for x in query.split(',')]))
	for box in boxes:
	boxes[box].change(fn=filter_tag_click, inputs=list(boxes.values()), outputs=data_tag_wise)
	search_bar_tag.submit(fn=search_tag_click, inputs=[search_bar_tag] + list(boxes.values()), outputs=data_tag_wise)

	with gr.TabItem('About GEO-bench 📖'):
	with gr.Row():
	gr.Markdown(f"""
	## About GEO-bench
	- GEO-bench is a benchmarking platform for content optimization Methods for generative engines.
	- It is a part of the work released under [GEO](https://arxiv.org/abs/2311.09735)
	- The benchmark comprises of 9 datasets, 7 of which were publicly available, while 2 have been released by us.
	- Dataset can be downloaded from [here](huggingface.co/datasets/GEO-optim/geo-bench)""")

	with gr.Row():

	# Goal of benchmarking content optimization for generative engines
	# Contains 10K carefully curated queries
	# Queries are diverse and cover many domains/intents
	# Annotated with tags/dimensions like domain, difficulty, etc.
	# Above list in HTML format
	gr.HTML(f"""
	<h3>Key-Highlights of GEO-bench</h3>
	<ul>
	<li>Goal of benchmarking content optimization for generative engines</li>
	<li>Contains 10K carefully curated queries</li>
	<li>Queries are diverse and cover many domains/intents</li>
	<li>Annotated with tags/dimensions like domain, difficulty, etc.</li>
	</ul>
	""")

	# Benchmark Link:
	# gr.Markdown(f"""### Benchmark Link: [GEO-bench](huggingface.co/datasets/GEO-optim/geo-bench)""")

	# Info about tags and other statistics


	with gr.TabItem('Submit 📝'):
	with gr.Row():
	gr.Markdown(f"""
	## Submit
	- To submit your Method, please check [here](github.com/Pranjal2041/GEO/GEO-Bench/leaderboard/Readme.md)""")


	# Create a form to submit, the response should be sent to a google form

	search_bar.submit(fn=search_button_click, inputs=search_bar, outputs=data_overall)

	if __name__ == "__main__":
	demo.launch(share=True)