Spaces:

zhzhen23
/

OmniSearchLeaderboard

Running

App Files Files Community

OmniSearchLeaderboard / app.py

zhzhen23

Update app.py

3cd0e82 verified 4 months ago

raw

history blame contribute delete

4.11 kB

	import streamlit as st
	import pandas as pd

	# 引入自定义CSS以调整页面布局
	st.markdown(
	"""
	<style>
	/* 控制内容的宽度和居中 */
	.reportview-container {
	max-width: 800px; /* 控制最大宽度 */
	margin-left: auto; /* 居中 */
	margin-right: auto;
	}
	/* 新增：减少页面两边的空白 */
	.streamlit-container {
	padding: 0px 10px; /* 左右边距 */
	}
	/* 控制TXT标签内的字体大小 */
	h2, h3, h4, h5, h6 {
	font-size: 16px; /* 适当减小字体大小 */
	}
	/* 控制表格样式 */
	.dataframe {
	width: 100% !important; /* 使表格宽度100% */
	border: none; /* 去掉表格边框 */
	}
	</style>
	""",
	unsafe_allow_html=True
	)

	# 设置页面标题
	st.title("🏆 Dyn-VQA Leaderboard")

	# 使用 container 来减少空白
	with st.container():
	# 数据集简介
	st.subheader("📑 Dataset Description")
	st.markdown('🌟 Dataset for [Benchmarking Multimodal Retrieval Augmented Generation with Dynamic VQA Dataset and Self-adaptive Planning Agent](https://arxiv.org/abs/2411.02937).')
	st.markdown('🌟 This dataset is linked to GitHub at [this URL](https://github.com/Alibaba-NLP/OmniSearch)')

	# 实验Leaderboard榜单数据
	data = {
	"Model": [
	"Omnisearch(gpt-4o)", "gpt-4o Two-Step mRAG", "gpt-4o Original LLMs",
	"qwen-vl-max Two-Step mRAG", "qwen25-vl-7b Two-Step mRAG",
	"gpt-4o Retrieving Images with Input Images", "deepseek-vl-7b-chat Two-Step mRAG",
	"qwen-vl-max Original LLMs", "deepseek-vl2 Two-Step mRAG",
	"qwen-vl-max Retrieving Images with Input Images", "qwen25-vl-7b Retrieving Images with Input Images",
	"qwen25-vl-7b Original LLMs", "deepseek-vl-7b-chat Retrieving Images with Input Images",
	"deepseek-vl2 Retrieving Images with Input Images", "deepseek-vl2 Original LLMs",
	"deepseek-vl-7b-chat Original LLMs"
	],
	"zh_Dynvqa": [
	54.23, 52.78, 46.54, 50.75, 46.27,
	40.84, 39.48, 32.84, 28.36, 25.37,
	21.98, 18.86, 13.03, 9.91, 9.50,
	8.68
	],
	"en_Dynvqa": [
	47.17, 45.03, 42.66, 37.76, 35.24,
	40.42, 28.11, 32.87, 26.01, 25.17,
	21.26, 19.71, 10.77, 12.73, 12.87,
	8.67
	],
	"average": [
	50.7, 48.905, 44.6, 44.255, 40.755,
	40.63, 33.795, 32.855, 27.185, 25.27,
	21.62, 19.285, 11.9, 11.32, 11.185,
	8.675
	]
	}

	# 将数据转换为DataFrame
	df = pd.DataFrame(data)

	# 显示Leaderboard表格
	st.subheader("🕹️ Experiment Leaderboard")
	st.dataframe(df)

	# 数据格式示例
	st.subheader("Data Format")
	st.json({
	"image_url": "https://www.pcarmarket.com/static/media/uploads/galleries/photos/uploads/galleries/22387-pasewark-1986-porsche-944/.thumbnails/IMG_7102.JPG.jpg",
	"question": "What is the model of car from this brand?",
	"question_id": 'qid',
	"answer": ["保时捷 944", "Porsche 944."]
	})

	# 更新信息
	st.markdown("🔥 The Dyn-VQA will be updated regularly. Latest version: 202502.")

	# 引用信息
	st.subheader("📝 Citation")
	st.code("""
	@article{li2024benchmarkingmultimodalretrievalaugmented,
	title={Benchmarking Multimodal Retrieval Augmented Generation with Dynamic VQA Dataset and Self-adaptive Planning Agent},
	author={Yangning Li and Yinghui Li and Xinyu Wang and Yong Jiang and Zhen Zhang and Xinran Zheng and Hui Wang and Hai-Tao Zheng and Pengjun Xie and Philip S. Yu and Fei Huang and Jingren Zhou},
	year={2024},
	eprint={2411.02937},
	archivePrefix={arXiv},
	primaryClass={cs.CL},
	url={https://arxiv.org/abs/2411.02937},
	}
	""")
	st.write("When citing our work, please kindly consider citing the original papers. The relevant citation information is listed here.")