yixuantt commited on
Commit
e55642f
β€’
1 Parent(s): 2546424

first_commit

Browse files
Files changed (5) hide show
  1. __pycache__/note.cpython-310.pyc +0 -0
  2. app.py +108 -28
  3. data.jsonl +6 -3
  4. note.py +19 -0
  5. requirements.txt +1 -0
__pycache__/note.cpython-310.pyc ADDED
Binary file (678 Bytes). View file
 
app.py CHANGED
@@ -1,7 +1,9 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import json
4
-
 
 
5
  def load_data():
6
  with open("data.jsonl", 'r', encoding='utf-8') as file:
7
  data = [json.loads(line) for line in file]
@@ -13,45 +15,123 @@ def case_insensitive_search(data, query, column):
13
  return data[data[column].str.lower().str.contains(query.lower())]
14
  return data
15
 
16
- def display_table(data, rows_per_page=10):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  container = st.container()
 
 
 
 
 
 
18
  with container:
19
- height = min(40 + rows_per_page * 38, 800)
20
- st.dataframe(data, height=height)
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  def main():
23
- st.title("Multihop-RAG Benchmark πŸ’‘")
 
 
24
 
25
  data = load_data()
26
 
27
- st.sidebar.header("Search Options")
28
- chat_model_query = st.sidebar.text_input("Chat Model")
29
- embedding_model_query = st.sidebar.text_input("Embedding Model")
30
- chunk_query = st.sidebar.text_input("Chunk")
31
- frame_query = st.sidebar.text_input("Framework")
32
-
33
- if chat_model_query:
34
- data = case_insensitive_search(data, chat_model_query, 'chat_model')
35
- if embedding_model_query:
36
- data = case_insensitive_search(data, embedding_model_query, 'embedding_model')
37
- if chunk_query:
38
- data = case_insensitive_search(data, chunk_query, 'chunk')
39
- if frame_query:
40
- data = case_insensitive_search(data, frame_query, 'framework')
41
 
42
- st.write("Displaying results across different frameworks, embedding models, chat models, and chunks.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  st.info("Retrieval Stage: MRR@10 and Hit@10; Response Stage: Accuracy ")
44
- display_table(data)
45
 
46
- st.sidebar.header("Citation")
47
- st.sidebar.info(
48
- "Please cite this dataset as:\n"
49
- "Tang, Yixuan, and Yi Yang. MultiHop-RAG: Benchmarking Retrieval-Augmented Generation for Multi-Hop Queries. ArXiv, 2024, /abs/2401.15391."
50
- )
51
  st.markdown("---")
52
  st.caption("For citation, please use: 'Tang, Yixuan, and Yi Yang. MultiHop-RAG: Benchmarking Retrieval-Augmented Generation for Multi-Hop Queries. ArXiv, 2024, /abs/2401.15391. '")
53
- st.markdown("---")
54
- st.caption("For results self-reporting, please send an email to ytangch@connect.ust.hk")
55
 
56
  if __name__ == "__main__":
57
  main()
 
1
  import streamlit as st
2
  import pandas as pd
3
  import json
4
+ from note import SUBMISSION
5
+ from st_aggrid import JsCode
6
+ from st_aggrid import AgGrid, GridOptionsBuilder
7
  def load_data():
8
  with open("data.jsonl", 'r', encoding='utf-8') as file:
9
  data = [json.loads(line) for line in file]
 
15
  return data[data[column].str.lower().str.contains(query.lower())]
16
  return data
17
 
18
+ def colored_note(text, background_color='#fcfced', text_color='black'):
19
+ st.markdown(f"""
20
+ <div style='background-color: {background_color}; color: {text_color};
21
+ border-radius: 8px; padding: 10px; margin: 8px 0; box-shadow: 2px 2px 5px grey;'>
22
+ {text}
23
+ """, unsafe_allow_html=True)
24
+
25
+ html_render = JsCode(
26
+ """
27
+ class UrlCellRenderer {
28
+ init(params) {
29
+ this.eGui = document.createElement('span');
30
+ if (params.value && params.value.includes('href=\\"')) {
31
+ const parser = new DOMParser();
32
+ const parsedHtml = parser.parseFromString(params.value, 'text/html');
33
+ const link = parsedHtml.querySelector('a');
34
+ if (link) {
35
+ this.eGui = document.createElement('a');
36
+ this.eGui.setAttribute('href', link.getAttribute('href'));
37
+ this.eGui.innerText = link.innerText;
38
+ this.eGui.setAttribute('style', "text-decoration:none");
39
+ this.eGui.setAttribute('target', "_blank");
40
+ } else {
41
+ this.eGui.innerText = params.value;
42
+ }
43
+ } else {
44
+ this.eGui.innerText = params.value;
45
+ }
46
+ }
47
+ getGui() {
48
+ return this.eGui;
49
+ }
50
+ }
51
+ """
52
+ )
53
+ def display_table(data, rows_per_page=12):
54
+ st.markdown("""
55
+ <style>
56
+ .centered {
57
+ display: flex;
58
+ justify-content: center;
59
+ }
60
+ .css-1l02zno {
61
+ flex: 1;
62
+ }
63
+ </style>
64
+ """, unsafe_allow_html=True)
65
  container = st.container()
66
+ gb = GridOptionsBuilder.from_dataframe(data)
67
+ gb.configure_columns(['Chat Model','Embedding Model','Reranker Model','Framework'],
68
+ cellRenderer=html_render,
69
+ sortable=True, filterable=True, resizable=True, )
70
+ gb.configure_column("Accuracy", sort='desc')
71
+ gridOptions = gb.build()
72
  with container:
73
+ height = min(40 + rows_per_page * 38, 800)
74
+ col2, col3 = st.columns([5, 3])
75
+ with col2:
76
+ st.markdown("""
77
+ <style>
78
+ .ag-theme-balham {
79
+ height: 500px;
80
+ width: 50%;
81
+ margin: auto;
82
+ }
83
+ </style>
84
+ """, unsafe_allow_html=True)
85
+ AgGrid(data, height=height, gridOptions=gridOptions, allow_unsafe_jscode=True)
86
+ with col3:
87
+ colored_note(SUBMISSION)
88
 
89
  def main():
90
+ st.set_page_config(layout="wide")
91
+ st.title("Multihop-RAG πŸ’‘")
92
+ st.write("Displaying results across different frameworks, embedding models, chat models, and chunks.")
93
 
94
  data = load_data()
95
 
96
+ st.markdown("""
97
+ <style>
98
+ div.stButton > button:first-child {
99
+ height: 2em;
100
+ width: 100%;
101
+ margin-top: 1.8em;
102
+ }
103
+ </style>
104
+ """, unsafe_allow_html=True)
105
+ col1, col2, col3, col4, col5 = st.columns(5)
 
 
 
 
106
 
107
+ with col1:
108
+ chat_model_query = st.text_input("Chat Model", key="chat_model_query")
109
+ with col2:
110
+ embedding_model_query = st.text_input("Embedding Model", key="embedding_model_query")
111
+ with col3:
112
+ chunk_query = st.text_input("Chunk", key="chunk_query")
113
+ with col4:
114
+ frame_query = st.text_input("Framework", key="frame_query")
115
+ with col5:
116
+ search_button = st.button("πŸ” Search")
117
+
118
+ if search_button:
119
+ if chat_model_query:
120
+ data = case_insensitive_search(data, chat_model_query, 'Chat Model')
121
+ if embedding_model_query:
122
+ data = case_insensitive_search(data, embedding_model_query, 'Embedding Model')
123
+ if chunk_query:
124
+ data = case_insensitive_search(data, chunk_query, 'Chunk Size')
125
+ if frame_query:
126
+ data = case_insensitive_search(data, frame_query, 'Framework')
127
+
128
  st.info("Retrieval Stage: MRR@10 and Hit@10; Response Stage: Accuracy ")
 
129
 
130
+ display_table(data)
 
 
 
 
131
  st.markdown("---")
132
  st.caption("For citation, please use: 'Tang, Yixuan, and Yi Yang. MultiHop-RAG: Benchmarking Retrieval-Augmented Generation for Multi-Hop Queries. ArXiv, 2024, /abs/2401.15391. '")
133
+ # st.markdown("---")
134
+ # st.caption("For results self-reporting, please send an email to ytangch@connect.ust.hk")
135
 
136
  if __name__ == "__main__":
137
  main()
data.jsonl CHANGED
@@ -1,3 +1,6 @@
1
- {"Framework":"naive_RAG","Chat Model":"GPT-4","Embedding Model":"voyage-02","Reranker Model":"BAAI\/bge-reranker-large","Chunk Size":256,"MRR@10":0.3934,"Hit@10":0.6506,"Accuracy":0.56}
2
- {"Framework":"naive_RAG","Chat Model":"ChatGPT","Embedding Model":"voyage-02","Reranker Model":"BAAI\/bge-reranker-large","Chunk Size":256,"MRR@10":0.3934,"Hit@10":0.6506,"Accuracy":0.44}
3
- {"Framework":"naive_RAG","Chat Model":"Llama-2-70b-chat-hf","Embedding Model":"voyage-02","Reranker Model":"BAAI\/bge-reranker-large","Chunk Size":256,"MRR@10":0.3934,"Hit@10":0.6506,"Accuracy":0.28}
 
 
 
 
1
+ {"Framework":"naive_RAG","Chat Model":"<a href=\"https://openai.com/index/gpt-4/\" target=\"_blank\">GPT-4</a>","Embedding Model":"<a href=\"https://docs.voyageai.com/docs/embeddings\" target=\"_blank\">voyage-02</a>","Reranker Model":"<a href=\"https://huggingface.co/BAAI/bge-reranker-large\" target=\"_blank\">BAAI/bge-reranker-large</a>","Chunk Size":256,"MRR@10":0.3934,"Hit@10":0.6506,"Accuracy":0.56}
2
+ {"Framework":"naive_RAG","Chat Model":"<a href=\"https://openai.com/chatgpt/\" target=\"_blank\">ChatGPT</a>","Embedding Model":"<a href=\"https://docs.voyageai.com/docs/embeddings\" target=\"_blank\">voyage-02</a>","Reranker Model":"<a href=\"https://huggingface.co/BAAI/bge-reranker-large\" target=\"_blank\">BAAI/bge-reranker-large</a>","Chunk Size":256,"MRR@10":0.3934,"Hit@10":0.6506,"Accuracy":0.44}
3
+ {"Framework":"naive_RAG","Chat Model":"<a href=\"https://huggingface.co/meta-llama/Llama-2-70b-chat-hf\" target=\"_blank\">meta-llama/Llama-2-70b-chat-hf</a>","Embedding Model":"<a href=\"https://docs.voyageai.com/docs/embeddings\" target=\"_blank\">voyage-02</a>","Reranker Model":"<a href=\"https://huggingface.co/BAAI/bge-reranker-large\" target=\"_blank\">BAAI/bge-reranker-large</a>","Chunk Size":256,"MRR@10":0.3934,"Hit@10":0.6506,"Accuracy":0.28}
4
+ {"Framework":"naive_RAG","Chat Model":"<a href=\"https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1\" target=\"_blank\">mistralai\/Mixtral-8x7B-Instruct-v0.1</a>","Embedding Model":"<a href=\"https://docs.voyageai.com/docs/embeddings\" target=\"_blank\">voyage-02</a>","Reranker Model":"<a href=\"https://huggingface.co/BAAI/bge-reranker-large\" target=\"_blank\">BAAI/bge-reranker-large</a>","Chunk Size":256,"MRR@10":0.3934,"Hit@10":0.6506,"Accuracy":0.32}
5
+ {"Framework":"naive_RAG","Chat Model":"<a href=\"https://www.anthropic.com/news/claude-2-1\" target=\"_blank\">Claude-2.1</a>","Embedding Model":"<a href=\"https://docs.voyageai.com/docs/embeddings\" target=\"_blank\">voyage-02</a>","Reranker Model":"<a href=\"https://huggingface.co/BAAI/bge-reranker-large\" target=\"_blank\">BAAI/bge-reranker-large</a>","Chunk Size":256,"MRR@10":0.3934,"Hit@10":0.6506,"Accuracy": 0.52}
6
+ {"Framework":"naive_RAG","Chat Model":"<a href=\"https://ai.google/discover/palm2/\" target=\"_blank\">Google-PaLM</a>","Embedding Model":"<a href=\"https://docs.voyageai.com/docs/embeddings\" target=\"_blank\">voyage-02</a>","Reranker Model":"<a href=\"https://huggingface.co/BAAI/bge-reranker-large\" target=\"_blank\">BAAI/bge-reranker-large</a>","Chunk Size":256,"MRR@10":0.3934,"Hit@10":0.6506,"Accuracy": 0.47}
note.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ SUBMISSION = """For the results report, please send the results to <strong>ytangch@connect.ust</strong><br><br>
3
+ The experimental results should be a .zip file containing the test metrics and experimental records (such as the retrieved chunks and LLM QA process records).<br>
4
+
5
+ <strong>Required Record:</strong>
6
+ * readme.md
7
+ ```
8
+ 1. Framework
9
+ 1. Chat Model
10
+ 2. Base Model
11
+ 3. Chunk Size (if no chunk, please fill with none)
12
+ 4. MRR@10,Hit@10,Accuracy
13
+ ```
14
+ * addition support
15
+ ```
16
+ 5. Retrieved Record For Each Query (in .json)
17
+ 6. QA Record For Each Query (in .json)
18
+ ```
19
+ """
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  fuzzywuzzy
2
  st-gsheets-connection
 
 
1
  fuzzywuzzy
2
  st-gsheets-connection
3
+ streamlit-aggrid