HeTalksInMaths commited on
Commit
4663c58
Β·
1 Parent(s): 29ce16b

Add progressive database expansion feature

Browse files

- Initial build: 5K questions (~3-5 min, fast first launch)
- Expand button: Add 5K more on demand (~2-3 min per click)
- Users can expand to full 12K questions progressively
- Database stats show current size and remaining questions
- Perfect UX: quick start + optional full expansion

Files changed (1) hide show
  1. app.py +116 -13
app.py CHANGED
@@ -95,21 +95,11 @@ else:
95
  logger.info(f"βœ“ Loaded existing database with {current_count:,} questions")
96
 
97
  def analyze_prompt(prompt: str, k: int = 5) -> str:
98
- """
99
- Analyze a prompt and return difficulty assessment.
100
-
101
- Args:
102
- prompt: The user's prompt/question
103
- k: Number of similar questions to retrieve
104
-
105
- Returns:
106
- Formatted analysis results
107
- """
108
  if not prompt.strip():
109
  return "Please enter a prompt to analyze."
110
 
111
  try:
112
- # Query the vector database
113
  result = db.query_similar_questions(prompt, k=k)
114
 
115
  # Format results
@@ -130,7 +120,6 @@ def analyze_prompt(prompt: str, k: int = 5) -> str:
130
  output.append(f" - Similarity: {q['similarity']:.3f}")
131
  output.append("")
132
 
133
- # Get current database size
134
  total_questions = db.collection.count()
135
  output.append(f"*Analyzed using {k} most similar questions from {total_questions:,} benchmark questions*")
136
 
@@ -139,11 +128,113 @@ def analyze_prompt(prompt: str, k: int = 5) -> str:
139
  except Exception as e:
140
  return f"Error analyzing prompt: {str(e)}"
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  # Create Gradio interface
143
  with gr.Blocks(title="ToGMAL Prompt Difficulty Analyzer") as demo:
144
  gr.Markdown("# 🧠 ToGMAL Prompt Difficulty Analyzer")
145
  gr.Markdown("Enter any prompt to see how difficult it is for current LLMs based on real benchmark data.")
146
 
 
 
 
 
 
 
 
 
147
  with gr.Row():
148
  with gr.Column():
149
  prompt_input = gr.Textbox(
@@ -158,7 +249,7 @@ with gr.Blocks(title="ToGMAL Prompt Difficulty Analyzer") as demo:
158
  step=1,
159
  label="Number of similar questions to show"
160
  )
161
- submit_btn = gr.Button("Analyze Difficulty")
162
 
163
  with gr.Column():
164
  result_output = gr.Markdown(label="Analysis Results")
@@ -189,6 +280,18 @@ with gr.Blocks(title="ToGMAL Prompt Difficulty Analyzer") as demo:
189
  inputs=[prompt_input, k_slider],
190
  outputs=result_output
191
  )
 
 
 
 
 
 
 
 
 
 
 
 
192
 
193
  if __name__ == "__main__":
194
  demo.launch(share=True, server_port=7861)
 
95
  logger.info(f"βœ“ Loaded existing database with {current_count:,} questions")
96
 
97
  def analyze_prompt(prompt: str, k: int = 5) -> str:
98
+ """Analyze a prompt and return difficulty assessment."""
 
 
 
 
 
 
 
 
 
99
  if not prompt.strip():
100
  return "Please enter a prompt to analyze."
101
 
102
  try:
 
103
  result = db.query_similar_questions(prompt, k=k)
104
 
105
  # Format results
 
120
  output.append(f" - Similarity: {q['similarity']:.3f}")
121
  output.append("")
122
 
 
123
  total_questions = db.collection.count()
124
  output.append(f"*Analyzed using {k} most similar questions from {total_questions:,} benchmark questions*")
125
 
 
128
  except Exception as e:
129
  return f"Error analyzing prompt: {str(e)}"
130
 
131
+
132
+ def expand_database(batch_size: int = 5000) -> str:
133
+ """Expand the database by adding another batch of questions."""
134
+ try:
135
+ from datasets import load_dataset
136
+ from benchmark_vector_db import BenchmarkQuestion
137
+ import random
138
+
139
+ current_count = db.collection.count()
140
+
141
+ # Load full MMLU-Pro test dataset
142
+ logger.info("Loading MMLU-Pro test dataset...")
143
+ test_dataset = load_dataset("TIGER-Lab/MMLU-Pro", split="test")
144
+ total_available = len(test_dataset)
145
+
146
+ # Figure out which questions we haven't indexed yet
147
+ # We'll use a simple offset approach
148
+ already_indexed = current_count
149
+ remaining = total_available - already_indexed
150
+
151
+ if remaining <= 0:
152
+ return f"βœ… Database is complete! All {total_available:,} questions indexed."
153
+
154
+ # Sample next batch
155
+ start_idx = already_indexed
156
+ end_idx = min(start_idx + batch_size, total_available)
157
+ batch_questions = []
158
+
159
+ logger.info(f"Expanding database: adding questions {start_idx} to {end_idx}...")
160
+
161
+ for idx in range(start_idx, end_idx):
162
+ item = test_dataset[idx]
163
+ question = BenchmarkQuestion(
164
+ question_id=f"mmlu_pro_test_{idx}",
165
+ source_benchmark="MMLU_Pro",
166
+ domain=item.get('category', 'unknown').lower(),
167
+ question_text=item['question'],
168
+ correct_answer=item['answer'],
169
+ choices=item.get('options', []),
170
+ success_rate=0.45,
171
+ difficulty_score=0.55,
172
+ difficulty_label="Hard",
173
+ num_models_tested=0
174
+ )
175
+ batch_questions.append(question)
176
+
177
+ # Index the batch
178
+ logger.info(f"Indexing {len(batch_questions)} new questions...")
179
+ db.index_questions(batch_questions)
180
+
181
+ new_count = db.collection.count()
182
+ still_remaining = total_available - new_count
183
+
184
+ result = f"βœ… Successfully added {len(batch_questions)} questions!\n\n"
185
+ result += f"**Database Stats:**\n"
186
+ result += f"- Total Questions: {new_count:,}\n"
187
+ result += f"- Just Added: {len(batch_questions)}\n"
188
+ result += f"- Remaining: {still_remaining:,}\n\n"
189
+
190
+ if still_remaining > 0:
191
+ result += f"Click 'Expand Database' again to add {min(batch_size, still_remaining)} more questions."
192
+ else:
193
+ result += f"πŸŽ‰ Database is now complete with all {total_available:,} questions!"
194
+
195
+ return result
196
+
197
+ except Exception as e:
198
+ logger.error(f"Expansion failed: {e}")
199
+ return f"❌ Error expanding database: {str(e)}"
200
+
201
+
202
+ def get_database_info() -> str:
203
+ """Get current database statistics."""
204
+ try:
205
+ current_count = db.collection.count()
206
+
207
+ # Estimate total available (MMLU-Pro test has ~12K)
208
+ total_available = 12032
209
+ remaining = total_available - current_count
210
+
211
+ info = f"### πŸ“Š Database Status\n\n"
212
+ info += f"**Current Size:** {current_count:,} questions\n"
213
+ info += f"**Available:** {total_available:,} questions\n"
214
+ info += f"**Remaining:** {max(0, remaining):,} questions\n\n"
215
+
216
+ if remaining > 0:
217
+ info += f"πŸ’‘ Click 'Expand Database' to add 5,000 more questions (takes ~2-3 min)"
218
+ else:
219
+ info += f"βœ… Database is complete!"
220
+
221
+ return info
222
+ except Exception as e:
223
+ return f"Error getting database info: {str(e)}"
224
+
225
  # Create Gradio interface
226
  with gr.Blocks(title="ToGMAL Prompt Difficulty Analyzer") as demo:
227
  gr.Markdown("# 🧠 ToGMAL Prompt Difficulty Analyzer")
228
  gr.Markdown("Enter any prompt to see how difficult it is for current LLMs based on real benchmark data.")
229
 
230
+ # Database expansion section
231
+ with gr.Accordion("πŸ“Š Database Management", open=False):
232
+ db_info = gr.Markdown(get_database_info())
233
+ with gr.Row():
234
+ expand_btn = gr.Button("πŸš€ Expand Database (+5K questions)", variant="secondary")
235
+ refresh_btn = gr.Button("πŸ”„ Refresh Stats", variant="secondary")
236
+ expand_output = gr.Markdown()
237
+
238
  with gr.Row():
239
  with gr.Column():
240
  prompt_input = gr.Textbox(
 
249
  step=1,
250
  label="Number of similar questions to show"
251
  )
252
+ submit_btn = gr.Button("Analyze Difficulty", variant="primary")
253
 
254
  with gr.Column():
255
  result_output = gr.Markdown(label="Analysis Results")
 
280
  inputs=[prompt_input, k_slider],
281
  outputs=result_output
282
  )
283
+
284
+ expand_btn.click(
285
+ fn=expand_database,
286
+ inputs=[],
287
+ outputs=expand_output
288
+ )
289
+
290
+ refresh_btn.click(
291
+ fn=get_database_info,
292
+ inputs=[],
293
+ outputs=db_info
294
+ )
295
 
296
  if __name__ == "__main__":
297
  demo.launch(share=True, server_port=7861)