import streamlit as st
import pandas as pd
import json
from note import SUBMISSION
from st_aggrid import JsCode
from st_aggrid import AgGrid, GridOptionsBuilder
def load_data():
with open("data.jsonl", 'r', encoding='utf-8') as file:
data = [json.loads(line) for line in file]
df = pd.DataFrame(data)
return df
def case_insensitive_search(data, query, column):
if query:
return data[data[column].str.lower().str.contains(query.lower())]
return data
def colored_note(text, background_color='#fcfced', text_color='black'):
st.markdown(f"""
{text}
""", unsafe_allow_html=True)
html_render = JsCode(
"""
class UrlCellRenderer {
init(params) {
this.eGui = document.createElement('span');
if (params.value && params.value.includes('href=\\"')) {
const parser = new DOMParser();
const parsedHtml = parser.parseFromString(params.value, 'text/html');
const link = parsedHtml.querySelector('a');
if (link) {
this.eGui = document.createElement('a');
this.eGui.setAttribute('href', link.getAttribute('href'));
this.eGui.innerText = link.innerText;
this.eGui.setAttribute('style', "text-decoration:none");
this.eGui.setAttribute('target', "_blank");
} else {
this.eGui.innerText = params.value;
}
} else {
this.eGui.innerText = params.value;
}
}
getGui() {
return this.eGui;
}
}
"""
)
def display_table(data, rows_per_page=12):
st.markdown("""
""", unsafe_allow_html=True)
container = st.container()
gb = GridOptionsBuilder.from_dataframe(data)
gb.configure_columns(['Chat Model','Embedding Model','Reranker Model','Framework'],
cellRenderer=html_render,
sortable=True, filterable=True, resizable=True, )
gb.configure_column("Accuracy", sort='desc')
gridOptions = gb.build()
with container:
height = min(40 + rows_per_page * 38, 800)
col2, col3 = st.columns([5, 3])
with col2:
st.markdown("""
""", unsafe_allow_html=True)
AgGrid(data, height=height, gridOptions=gridOptions, allow_unsafe_jscode=True)
with col3:
colored_note(SUBMISSION)
def main():
st.set_page_config(layout="wide")
st.title("Multihop-RAG 💡")
st.write("Displaying results across different frameworks, embedding models, chat models, and chunks.")
data = load_data()
st.markdown("""
""", unsafe_allow_html=True)
col1, col2, col3, col4, col5 = st.columns(5)
with col1:
chat_model_query = st.text_input("Chat Model", key="chat_model_query")
with col2:
embedding_model_query = st.text_input("Embedding Model", key="embedding_model_query")
with col3:
chunk_query = st.text_input("Chunk", key="chunk_query")
with col4:
frame_query = st.text_input("Framework", key="frame_query")
with col5:
search_button = st.button("🔍 Search")
if search_button:
if chat_model_query:
data = case_insensitive_search(data, chat_model_query, 'Chat Model')
if embedding_model_query:
data = case_insensitive_search(data, embedding_model_query, 'Embedding Model')
if chunk_query:
data = case_insensitive_search(data, chunk_query, 'Chunk Size')
if frame_query:
data = case_insensitive_search(data, frame_query, 'Framework')
st.info("Retrieval Stage: MRR@10 and Hit@10; Response Stage: Accuracy ")
display_table(data)
st.markdown("---")
st.caption("For citation, please use: 'Tang, Yixuan, and Yi Yang. MultiHop-RAG: Benchmarking Retrieval-Augmented Generation for Multi-Hop Queries. ArXiv, 2024, /abs/2401.15391. '")
# st.markdown("---")
# st.caption("For results self-reporting, please send an email to ytangch@connect.ust.hk")
if __name__ == "__main__":
main()