Spaces:
Running
Running
import streamlit as st | |
import pandas as pd | |
# 引入自定义CSS以调整页面布局 | |
st.markdown( | |
""" | |
<style> | |
/* 控制内容的宽度和居中 */ | |
.reportview-container { | |
max-width: 800px; /* 控制最大宽度 */ | |
margin-left: auto; /* 居中 */ | |
margin-right: auto; | |
} | |
/* 新增:减少页面两边的空白 */ | |
.streamlit-container { | |
padding: 0px 10px; /* 左右边距 */ | |
} | |
/* 控制TXT标签内的字体大小 */ | |
h2, h3, h4, h5, h6 { | |
font-size: 16px; /* 适当减小字体大小 */ | |
} | |
/* 控制表格样式 */ | |
.dataframe { | |
width: 100% !important; /* 使表格宽度100% */ | |
border: none; /* 去掉表格边框 */ | |
} | |
</style> | |
""", | |
unsafe_allow_html=True | |
) | |
# 设置页面标题 | |
st.title("🏆 Dyn-VQA Leaderboard") | |
# 使用 container 来减少空白 | |
with st.container(): | |
# 数据集简介 | |
st.subheader("📑 Dataset Description") | |
st.markdown('🌟 Dataset for [*Benchmarking Multimodal Retrieval Augmented Generation with Dynamic VQA Dataset and Self-adaptive Planning Agent*](https://arxiv.org/abs/2411.02937).') | |
st.markdown('🌟 This dataset is linked to GitHub at [this URL](https://github.com/Alibaba-NLP/OmniSearch)') | |
# 实验Leaderboard榜单数据 | |
data = { | |
"Model": [ | |
"Omnisearch(gpt-4o)", "gpt-4o Two-Step mRAG", "gpt-4o Original LLMs", | |
"qwen-vl-max Two-Step mRAG", "qwen25-vl-7b Two-Step mRAG", | |
"gpt-4o Retrieving Images with Input Images", "deepseek-vl-7b-chat Two-Step mRAG", | |
"qwen-vl-max Original LLMs", "deepseek-vl2 Two-Step mRAG", | |
"qwen-vl-max Retrieving Images with Input Images", "qwen25-vl-7b Retrieving Images with Input Images", | |
"qwen25-vl-7b Original LLMs", "deepseek-vl-7b-chat Retrieving Images with Input Images", | |
"deepseek-vl2 Retrieving Images with Input Images", "deepseek-vl2 Original LLMs", | |
"deepseek-vl-7b-chat Original LLMs" | |
], | |
"zh_Dynvqa": [ | |
54.23, 52.78, 46.54, 50.75, 46.27, | |
40.84, 39.48, 32.84, 28.36, 25.37, | |
21.98, 18.86, 13.03, 9.91, 9.50, | |
8.68 | |
], | |
"en_Dynvqa": [ | |
47.17, 45.03, 42.66, 37.76, 35.24, | |
40.42, 28.11, 32.87, 26.01, 25.17, | |
21.26, 19.71, 10.77, 12.73, 12.87, | |
8.67 | |
], | |
"average": [ | |
50.7, 48.905, 44.6, 44.255, 40.755, | |
40.63, 33.795, 32.855, 27.185, 25.27, | |
21.62, 19.285, 11.9, 11.32, 11.185, | |
8.675 | |
] | |
} | |
# 将数据转换为DataFrame | |
df = pd.DataFrame(data) | |
# 显示Leaderboard表格 | |
st.subheader("🕹️ Experiment Leaderboard") | |
st.dataframe(df) | |
# 数据格式示例 | |
st.subheader("Data Format") | |
st.json({ | |
"image_url": "https://www.pcarmarket.com/static/media/uploads/galleries/photos/uploads/galleries/22387-pasewark-1986-porsche-944/.thumbnails/IMG_7102.JPG.jpg", | |
"question": "What is the model of car from this brand?", | |
"question_id": 'qid', | |
"answer": ["保时捷 944", "Porsche 944."] | |
}) | |
# 更新信息 | |
st.markdown("🔥 The Dyn-VQA **will be updated regularly.** Latest version: 202502.") | |
# 引用信息 | |
st.subheader("📝 Citation") | |
st.code(""" | |
@article{li2024benchmarkingmultimodalretrievalaugmented, | |
title={Benchmarking Multimodal Retrieval Augmented Generation with Dynamic VQA Dataset and Self-adaptive Planning Agent}, | |
author={Yangning Li and Yinghui Li and Xinyu Wang and Yong Jiang and Zhen Zhang and Xinran Zheng and Hui Wang and Hai-Tao Zheng and Pengjun Xie and Philip S. Yu and Fei Huang and Jingren Zhou}, | |
year={2024}, | |
eprint={2411.02937}, | |
archivePrefix={arXiv}, | |
primaryClass={cs.CL}, | |
url={https://arxiv.org/abs/2411.02937}, | |
} | |
""") | |
st.write("When citing our work, please kindly consider citing the original papers. The relevant citation information is listed here.") | |