Chris4K commited on
Commit
1754322
1 Parent(s): e9d5e9c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -19
app.py CHANGED
@@ -155,18 +155,8 @@ def calculate_statistics(results, search_time):
155
  "search_time": search_time
156
  }
157
 
158
- def format_results(results, stats):
159
- df = pd.DataFrame([
160
- {
161
- "Content": doc.page_content,
162
- "Source": doc.metadata.get("source", "Unknown"),
163
- "Relevance Score": doc.metadata.get("score", "N/A")
164
- } for doc in results
165
- ])
166
-
167
- formatted_stats = pd.DataFrame([stats])
168
-
169
- return gr.DataFrame(df), gr.DataFrame(formatted_stats)
170
 
171
  def compare_embeddings(file, query, model_types, model_names, split_strategy, chunk_size, overlap_size, custom_separators, vector_store_type, search_type, top_k):
172
  all_results = []
@@ -195,11 +185,24 @@ def compare_embeddings(file, query, model_types, model_names, split_strategy, ch
195
  stats = calculate_statistics(results, search_time)
196
  stats["model"] = f"{model_type} - {model_name}"
197
 
198
- all_results.append(results)
199
- all_stats.append(stats)
 
200
 
201
- # Flatten the return values to match Gradio's expectation for each output
202
- return [item for sublist in all_results for item in sublist] + [item for sublist in all_stats for item in sublist]
 
 
 
 
 
 
 
 
 
 
 
 
203
 
204
  # Gradio interface
205
  iface = gr.Interface(
@@ -217,9 +220,10 @@ iface = gr.Interface(
217
  gr.Radio(choices=["similarity", "mmr"], label="Search Type", value="similarity"),
218
  gr.Slider(1, 10, step=1, value=5, label="Top K")
219
  ],
220
- #outputs=[gr.DataFrame(label="Results"), gr.DataFrame(label="Statistics")] * len(MODELS),
221
- outputs = [gr.Text(label=f"Results {i+1}") for i in range(len(MODELS)-1)] + \
222
- [gr.Text(label=f"Statistics {i+1}") for i in range(len(MODELS)-1)],
 
223
  title="Embedding Comparison Tool",
224
  description="Compare different embedding models and retrieval strategies"
225
  )
 
155
  "search_time": search_time
156
  }
157
 
158
+ import gradio as gr
159
+ import pandas as pd
 
 
 
 
 
 
 
 
 
 
160
 
161
  def compare_embeddings(file, query, model_types, model_names, split_strategy, chunk_size, overlap_size, custom_separators, vector_store_type, search_type, top_k):
162
  all_results = []
 
185
  stats = calculate_statistics(results, search_time)
186
  stats["model"] = f"{model_type} - {model_name}"
187
 
188
+ formatted_results, formatted_stats = format_results(results, stats)
189
+ all_results.append(formatted_results)
190
+ all_stats.append(formatted_stats)
191
 
192
+ return all_results + all_stats
193
+
194
+ def format_results(results, stats):
195
+ df = pd.DataFrame([
196
+ {
197
+ "Content": doc.page_content,
198
+ "Source": doc.metadata.get("source", "Unknown"),
199
+ "Relevance Score": doc.metadata.get("score", "N/A")
200
+ } for doc in results
201
+ ])
202
+
203
+ formatted_stats = pd.DataFrame([stats])
204
+
205
+ return df, formatted_stats
206
 
207
  # Gradio interface
208
  iface = gr.Interface(
 
220
  gr.Radio(choices=["similarity", "mmr"], label="Search Type", value="similarity"),
221
  gr.Slider(1, 10, step=1, value=5, label="Top K")
222
  ],
223
+ outputs=[
224
+ gr.Dataframe(label="Results"),
225
+ gr.Dataframe(label="Statistics")
226
+ ],
227
  title="Embedding Comparison Tool",
228
  description="Compare different embedding models and retrieval strategies"
229
  )