bhardwaj08sarthak commited on
Commit
423136d
·
verified ·
1 Parent(s): b087291

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -76
app.py CHANGED
@@ -3,64 +3,74 @@ import json
3
  import gradio as gr
4
  from huggingface_hub import InferenceClient
5
  from smolagents import CodeAgent, InferenceClientModel, tool
6
- import spaces
7
  from level_classifier_tool import (
8
  classify_levels_phrases,
9
  HFEmbeddingBackend,
10
  build_phrase_index
11
  )
 
 
12
  BLOOMS_PHRASES = {
13
- "Remember": [
14
- "define", "list", "recall", "identify", "state", "label", "name", "recognize", "find", "select", "match", "choose", "give", "write", "tell", "show"
 
15
  ],
16
- "Understand": [
17
- "classify", "interpret", "summarize", "explain", "estimate", "describe", "discuss", "predict", "paraphrase", "restate", "illustrate", "compare", "contrast", "report"
 
18
  ],
19
- "Apply": [
20
- "apply", "solve", "use", "demonstrate", "calculate", "implement", "perform", "execute", "carry out", "practice", "employ", "sketch"
 
21
  ],
22
- "Analyze": [
23
- "analyze", "differentiate", "organize", "structure", "break down", "distinguish", "dissect", "examine", "compare", "contrast", "attribute", "investigate"
 
24
  ],
25
- "Evaluate": [
26
- "evaluate", "judge", "critique", "assess", "defend", "argue", "select", "support", "appraise", "recommend", "conclude", "review"
 
27
  ],
28
- "Create": [
29
- "create", "design", "compose", "plan", "construct", "produce", "devise", "generate", "develop", "formulate", "invent", "build"
 
30
  ]
31
  }
32
 
33
  DOK_PHRASES = {
34
  "DOK1": [
35
  "define", "list", "recall", "compute", "identify", "state", "label", "how many",
36
- "name", "recognize", "find", "determine", "select", "match", "choose", "give",
37
  "write", "tell", "show", "point out"
38
  ],
39
  "DOK2": [
40
  "classify", "interpret", "estimate", "organise", "summarise", "explain", "solve",
41
- "categorize", "group", "compare", "contrast", "distinguish", "make observations",
42
- "collect data", "display data", "arrange", "sort", "paraphrase", "restate", "predict",
43
  "approximate", "demonstrate", "illustrate", "describe", "analyze data"
44
  ],
45
  "DOK3": [
46
  "justify", "analyze", "generalise", "compare", "construct", "investigate",
47
- "support", "defend", "argue", "examine", "differentiate", "criticize", "debate",
48
- "test", "experiment", "hypothesize", "draw conclusions", "break down", "dissect",
49
  "probe", "explore", "develop", "formulate"
50
  ],
51
  "DOK4": [
52
  "design", "synthesize", "model", "prove", "evaluate system", "critique", "create",
53
- "compose", "plan", "invent", "devise", "generate", "build", "construct", "produce",
54
- "formulate", "improve", "revise", "assess", "appraise", "judge", "recommend",
55
  "predict outcome", "simulate"
56
  ]
57
  }
58
 
59
- # Prebuild embeddings once
60
- _backend = HFEmbeddingBackend(model_name="sentence-transformers/all-MiniLM-L6-v2").to('cuda')
61
  _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
62
  _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
63
 
 
 
64
  @tool
65
  def classify_and_score(
66
  question: str,
@@ -69,7 +79,7 @@ def classify_and_score(
69
  agg: str = "max"
70
  ) -> dict:
71
  """Classify a question against Bloom’s and DOK targets and return guidance.
72
-
73
  Args:
74
  question: The question text to evaluate for cognitive demand.
75
  target_bloom: Target Bloom’s level or range. Accepts exact (e.g., "Analyze")
@@ -78,7 +88,7 @@ def classify_and_score(
78
  or span (e.g., "DOK2-DOK3").
79
  agg: Aggregation method over phrase similarities within a level
80
  (choices: "mean", "max", "topk_mean").
81
-
82
  Returns:
83
  A dictionary with:
84
  ok: True if both Bloom’s and DOK match the targets.
@@ -97,21 +107,40 @@ def classify_and_score(
97
  )
98
 
99
  def _parse_target_bloom(t: str):
100
- order = ["Remember","Understand","Apply","Analyze","Evaluate","Create"]
101
  if t.endswith("+"):
102
  base = t[:-1]
 
 
103
  return set(order[order.index(base):])
 
 
104
  return {t}
105
 
106
  def _parse_target_dok(t: str):
107
- order = ["DOK1","DOK2","DOK3","DOK4"]
108
  if "-" in t:
109
  lo, hi = t.split("-")
110
- return set(order[order.index(lo):order.index(hi)+1])
 
 
 
 
111
  return {t}
112
 
113
- bloom_target_set = _parse_target_bloom(target_bloom)
114
- dok_target_set = _parse_target_dok(target_dok)
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  bloom_best = res["blooms"]["best_level"]
117
  dok_best = res["dok"]["best_level"]
@@ -119,16 +148,19 @@ def classify_and_score(
119
  bloom_ok = bloom_best in bloom_target_set
120
  dok_ok = dok_best in dok_target_set
121
 
 
 
 
122
  feedback_parts = []
123
  if not bloom_ok:
124
  feedback_parts.append(
125
- f"Shift Bloom’s from {bloom_best} toward {sorted(bloom_target_set)}. "
126
- f"Top cues: {res['blooms']['top_phrases'].get(bloom_best, [])[:3]}"
127
  )
128
  if not dok_ok:
129
  feedback_parts.append(
130
- f"Shift DOK from {dok_best} toward {sorted(dok_target_set)}. "
131
- f"Top cues: {res['dok']['top_phrases'].get(dok_best, [])[:3]}"
132
  )
133
 
134
  return {
@@ -144,25 +176,18 @@ def classify_and_score(
144
 
145
 
146
  # ------------------------ Agent setup with timeout ------------------------
147
- def make_agent(hf_token: str, model_id: str, provider: str, timeout: int, temperature: float, max_tokens: int):
148
  client = InferenceClient(
149
  model=model_id,
150
- provider=provider,
151
  timeout=timeout,
152
- token=hf_token if hf_token else None,
153
  )
154
-
155
  model = InferenceClientModel(client=client)
156
  agent = CodeAgent(model=model, tools=[classify_and_score])
157
- agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens} # attach for reference
 
158
  return agent
159
- @spaces.GPU(duration=20)
160
- def load_model():
161
- transformers_model = TransformersModel(
162
- model_id='swiss-ai/Apertus-70B-Instruct-2509',
163
- device_map="auto"
164
- ).to("cuda")
165
- return transformers_model
166
 
167
  # ------------------------ Agent task template -----------------------------
168
  TASK_TMPL = '''You generate {subject} question candidates for {grade} on "{topic}".
@@ -185,7 +210,33 @@ If you output JSON, ensure it is valid JSON (no trailing commas, use double quot
185
  '''
186
 
187
 
188
- # ------------------------ Gradio glue ------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  def run_pipeline(
190
  hf_token,
191
  topic,
@@ -195,16 +246,14 @@ def run_pipeline(
195
  target_dok,
196
  attempts,
197
  model_id,
198
- provider,
199
  timeout,
200
  temperature,
201
  max_tokens
202
  ):
203
- # Build agent per run (or cache if you prefer)
204
  agent = make_agent(
205
- hf_token=hf_token.strip(),
206
  model_id=model_id,
207
- provider=provider,
208
  timeout=int(timeout),
209
  temperature=float(temperature),
210
  max_tokens=int(max_tokens),
@@ -221,25 +270,23 @@ def run_pipeline(
221
 
222
  # The agent will internally call the tool
223
  try:
224
- result_text = agent.run(task, max_steps=int(attempts)*4)
225
  except Exception as e:
226
  result_text = f"ERROR: {e}"
227
 
228
  # Try to extract final JSON
229
  final_json = ""
230
- try:
231
- # find JSON object in result_text (simple heuristic)
232
- start = result_text.find("{")
233
- end = result_text.rfind("}")
234
- if start != -1 and end != -1 and end > start:
235
- candidate = result_text[start:end+1]
236
  final_json = json.dumps(json.loads(candidate), indent=2)
237
- except Exception:
238
- final_json = ""
239
 
240
  return final_json, result_text
241
 
242
 
 
243
  with gr.Blocks() as demo:
244
  gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty")
245
  gr.Markdown(
@@ -248,54 +295,61 @@ with gr.Blocks() as demo:
248
  )
249
 
250
  with gr.Accordion("API Settings", open=False):
251
- hf_token = gr.Textbox(label="Hugging Face Token (required if the endpoint needs auth)", type="password")
252
- model_id = gr.Textbox(value=transformers_model, label="Model ID")
253
- provider = gr.Textbox(value="novita", label="Provider")
 
 
 
 
 
254
  timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s)")
255
 
256
  with gr.Row():
257
  topic = gr.Textbox(value="Fractions", label="Topic")
258
  grade = gr.Dropdown(
259
- choices=["Grade 1","Grade 2","Grade 3","Grade4","Grade 5","Grade 6","Grade 7","Grade 8","Grade 9",
260
- "Grade 10","Grade 11","Grade 12","Under Graduate","Post Graduate"],
 
 
 
 
261
  value="Grade 7",
262
  label="Grade"
263
  )
264
- subject= gr.Textbox(value="Math", label="Subject")
265
 
266
  with gr.Row():
267
  target_bloom = gr.Dropdown(
268
- choices=["Remember","Understand","Apply","Analyze","Evaluate","Create"],
269
  value="Analyze",
270
  label="Target Bloom’s"
271
  )
272
  target_dok = gr.Dropdown(
273
- choices=["DOK1","DOK2","DOK3","DOK4","DOK1-DOK2","DOK2-DOK3","DOK3-DOK4"],
274
  value="DOK2-DOK3",
275
  label="Target Depth of Knowledge"
276
  )
277
  attempts = gr.Slider(1, 8, value=5, step=1, label="Max Attempts")
278
 
279
- with gr.Accordion("⚙️ Generation Controls", open=False):
280
  temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature")
281
  max_tokens = gr.Slider(64, 1024, value=300, step=16, label="Max Tokens")
282
 
283
- run_btn = gr.Button("Run Agent 🚀")
284
 
285
  final_json = gr.Code(label="Final Candidate (JSON if detected)", language="json")
286
  transcript = gr.Textbox(label="Agent Transcript", lines=18)
287
 
288
  run_btn.click(
289
  fn=run_pipeline,
290
- inputs=[hf_token, topic, grade, subject, target_bloom, target_dok, attempts, model_id, provider, timeout, temperature, max_tokens],
 
 
 
 
291
  outputs=[final_json, transcript]
292
  )
293
 
294
  if __name__ == "__main__" or os.getenv("SYSTEM") == "spaces":
295
- try:
296
- load_model() # triggers GPU allocation during startup
297
- except Exception as e:
298
- # don't crash the app if warmup fails; logs will show details
299
- print("Warmup failed:", e)
300
  demo.launch()
301
-
 
3
  import gradio as gr
4
  from huggingface_hub import InferenceClient
5
  from smolagents import CodeAgent, InferenceClientModel, tool
6
+
7
  from level_classifier_tool import (
8
  classify_levels_phrases,
9
  HFEmbeddingBackend,
10
  build_phrase_index
11
  )
12
+
13
+ # ------------------------ Taxonomy phrases ------------------------
14
  BLOOMS_PHRASES = {
15
+ "Remember": [
16
+ "define", "list", "recall", "identify", "state", "label", "name", "recognize", "find",
17
+ "select", "match", "choose", "give", "write", "tell", "show"
18
  ],
19
+ "Understand": [
20
+ "classify", "interpret", "summarize", "explain", "estimate", "describe", "discuss",
21
+ "predict", "paraphrase", "restate", "illustrate", "compare", "contrast", "report"
22
  ],
23
+ "Apply": [
24
+ "apply", "solve", "use", "demonstrate", "calculate", "implement", "perform",
25
+ "execute", "carry out", "practice", "employ", "sketch"
26
  ],
27
+ "Analyze": [
28
+ "analyze", "differentiate", "organize", "structure", "break down", "distinguish",
29
+ "dissect", "examine", "compare", "contrast", "attribute", "investigate"
30
  ],
31
+ "Evaluate": [
32
+ "evaluate", "judge", "critique", "assess", "defend", "argue", "select", "support",
33
+ "appraise", "recommend", "conclude", "review"
34
  ],
35
+ "Create": [
36
+ "create", "design", "compose", "plan", "construct", "produce", "devise", "generate",
37
+ "develop", "formulate", "invent", "build"
38
  ]
39
  }
40
 
41
  DOK_PHRASES = {
42
  "DOK1": [
43
  "define", "list", "recall", "compute", "identify", "state", "label", "how many",
44
+ "name", "recognize", "find", "determine", "select", "match", "choose", "give",
45
  "write", "tell", "show", "point out"
46
  ],
47
  "DOK2": [
48
  "classify", "interpret", "estimate", "organise", "summarise", "explain", "solve",
49
+ "categorize", "group", "compare", "contrast", "distinguish", "make observations",
50
+ "collect data", "display data", "arrange", "sort", "paraphrase", "restate", "predict",
51
  "approximate", "demonstrate", "illustrate", "describe", "analyze data"
52
  ],
53
  "DOK3": [
54
  "justify", "analyze", "generalise", "compare", "construct", "investigate",
55
+ "support", "defend", "argue", "examine", "differentiate", "criticize", "debate",
56
+ "test", "experiment", "hypothesize", "draw conclusions", "break down", "dissect",
57
  "probe", "explore", "develop", "formulate"
58
  ],
59
  "DOK4": [
60
  "design", "synthesize", "model", "prove", "evaluate system", "critique", "create",
61
+ "compose", "plan", "invent", "devise", "generate", "build", "construct", "produce",
62
+ "formulate", "improve", "revise", "assess", "appraise", "judge", "recommend",
63
  "predict outcome", "simulate"
64
  ]
65
  }
66
 
67
+ # ------------------------ Prebuild embeddings once ------------------------
68
+ _backend = HFEmbeddingBackend(model_name="sentence-transformers/all-MiniLM-L6-v2")
69
  _BLOOM_INDEX = build_phrase_index(_backend, BLOOMS_PHRASES)
70
  _DOK_INDEX = build_phrase_index(_backend, DOK_PHRASES)
71
 
72
+
73
+ # ------------------------ Tool: classify and score ------------------------
74
  @tool
75
  def classify_and_score(
76
  question: str,
 
79
  agg: str = "max"
80
  ) -> dict:
81
  """Classify a question against Bloom’s and DOK targets and return guidance.
82
+
83
  Args:
84
  question: The question text to evaluate for cognitive demand.
85
  target_bloom: Target Bloom’s level or range. Accepts exact (e.g., "Analyze")
 
88
  or span (e.g., "DOK2-DOK3").
89
  agg: Aggregation method over phrase similarities within a level
90
  (choices: "mean", "max", "topk_mean").
91
+
92
  Returns:
93
  A dictionary with:
94
  ok: True if both Bloom’s and DOK match the targets.
 
107
  )
108
 
109
  def _parse_target_bloom(t: str):
110
+ order = ["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"]
111
  if t.endswith("+"):
112
  base = t[:-1]
113
+ if base not in order:
114
+ raise ValueError(f"Invalid Bloom target '{t}'")
115
  return set(order[order.index(base):])
116
+ if t not in order:
117
+ raise ValueError(f"Invalid Bloom target '{t}'")
118
  return {t}
119
 
120
  def _parse_target_dok(t: str):
121
+ order = ["DOK1", "DOK2", "DOK3", "DOK4"]
122
  if "-" in t:
123
  lo, hi = t.split("-")
124
+ if lo not in order or hi not in order or order.index(lo) > order.index(hi):
125
+ raise ValueError(f"Invalid DOK range '{t}'")
126
+ return set(order[order.index(lo):order.index(hi) + 1])
127
+ if t not in order:
128
+ raise ValueError(f"Invalid DOK target '{t}'")
129
  return {t}
130
 
131
+ try:
132
+ bloom_target_set = _parse_target_bloom(target_bloom)
133
+ dok_target_set = _parse_target_dok(target_dok)
134
+ except Exception as e:
135
+ return {
136
+ "ok": False,
137
+ "measured": {},
138
+ "feedback": (
139
+ f"Invalid targets: {e}. Use Bloom in "
140
+ "{Remember, Understand, Apply, Analyze, Evaluate, Create} "
141
+ "and DOK in {DOK1..DOK4} or ranges like 'DOK2-DOK3'."
142
+ ),
143
+ }
144
 
145
  bloom_best = res["blooms"]["best_level"]
146
  dok_best = res["dok"]["best_level"]
 
148
  bloom_ok = bloom_best in bloom_target_set
149
  dok_ok = dok_best in dok_target_set
150
 
151
+ top_bloom_phrases = res["blooms"].get("top_phrases", {})
152
+ top_dok_phrases = res["dok"].get("top_phrases", {})
153
+
154
  feedback_parts = []
155
  if not bloom_ok:
156
  feedback_parts.append(
157
+ f"Shift Bloom’s from {bloom_best} toward {sorted(list(bloom_target_set))}. "
158
+ f"Top cues: {top_bloom_phrases.get(bloom_best, [])[:3]}"
159
  )
160
  if not dok_ok:
161
  feedback_parts.append(
162
+ f"Shift DOK from {dok_best} toward {sorted(list(dok_target_set))}. "
163
+ f"Top cues: {top_dok_phrases.get(dok_best, [])[:3]}"
164
  )
165
 
166
  return {
 
176
 
177
 
178
  # ------------------------ Agent setup with timeout ------------------------
179
+ def make_agent(hf_token: str, model_id: str, timeout: int, temperature: float, max_tokens: int):
180
  client = InferenceClient(
181
  model=model_id,
 
182
  timeout=timeout,
183
+ token=hf_token or None,
184
  )
 
185
  model = InferenceClientModel(client=client)
186
  agent = CodeAgent(model=model, tools=[classify_and_score])
187
+ # Not used by the agent core, but helpful for debugging/visibility
188
+ agent._ui_params = {"temperature": temperature, "max_tokens": max_tokens}
189
  return agent
190
+
 
 
 
 
 
 
191
 
192
  # ------------------------ Agent task template -----------------------------
193
  TASK_TMPL = '''You generate {subject} question candidates for {grade} on "{topic}".
 
210
  '''
211
 
212
 
213
+ # ------------------------ Utility: robust JSON extractor ------------------
214
+ def extract_top_level_json(s: str) -> str:
215
+ """
216
+ Extract the first top-level JSON object from a string by tracking braces.
217
+ Returns the JSON string if found, else "".
218
+ """
219
+ start = s.find("{")
220
+ if start == -1:
221
+ return ""
222
+ depth = 0
223
+ for i in range(start, len(s)):
224
+ if s[i] == "{":
225
+ depth += 1
226
+ elif s[i] == "}":
227
+ depth -= 1
228
+ if depth == 0:
229
+ candidate = s[start:i + 1]
230
+ try:
231
+ # validate
232
+ json.loads(candidate)
233
+ return candidate
234
+ except Exception:
235
+ return ""
236
+ return ""
237
+
238
+
239
+ # ------------------------ Pipeline ---------------------------------------
240
  def run_pipeline(
241
  hf_token,
242
  topic,
 
246
  target_dok,
247
  attempts,
248
  model_id,
 
249
  timeout,
250
  temperature,
251
  max_tokens
252
  ):
253
+ # Build agent per run
254
  agent = make_agent(
255
+ hf_token=(hf_token or "").strip(),
256
  model_id=model_id,
 
257
  timeout=int(timeout),
258
  temperature=float(temperature),
259
  max_tokens=int(max_tokens),
 
270
 
271
  # The agent will internally call the tool
272
  try:
273
+ result_text = agent.run(task, max_steps=int(attempts) * 4)
274
  except Exception as e:
275
  result_text = f"ERROR: {e}"
276
 
277
  # Try to extract final JSON
278
  final_json = ""
279
+ candidate = extract_top_level_json(result_text or "")
280
+ if candidate:
281
+ try:
 
 
 
282
  final_json = json.dumps(json.loads(candidate), indent=2)
283
+ except Exception:
284
+ final_json = ""
285
 
286
  return final_json, result_text
287
 
288
 
289
+ # ------------------------ Gradio UI --------------------------------------
290
  with gr.Blocks() as demo:
291
  gr.Markdown("# Agent + Tool: Generate Questions to Target Difficulty")
292
  gr.Markdown(
 
295
  )
296
 
297
  with gr.Accordion("API Settings", open=False):
298
+ hf_token = gr.Textbox(
299
+ label="Hugging Face Token (required if the endpoint needs auth)",
300
+ type="password"
301
+ )
302
+ model_id = gr.Textbox(
303
+ value="swiss-ai/Apertus-70B-Instruct-2509",
304
+ label="Model ID"
305
+ )
306
  timeout = gr.Slider(5, 120, value=30, step=1, label="Timeout (s)")
307
 
308
  with gr.Row():
309
  topic = gr.Textbox(value="Fractions", label="Topic")
310
  grade = gr.Dropdown(
311
+ choices=[
312
+ "Grade 1", "Grade 2", "Grade 3", "Grade 4", "Grade 5", "Grade 6",
313
+ "Grade 7", "Grade 8", "Grade 9",
314
+ "Grade 10", "Grade 11", "Grade 12",
315
+ "Under Graduate", "Post Graduate"
316
+ ],
317
  value="Grade 7",
318
  label="Grade"
319
  )
320
+ subject = gr.Textbox(value="Math", label="Subject")
321
 
322
  with gr.Row():
323
  target_bloom = gr.Dropdown(
324
+ choices=["Remember", "Understand", "Apply", "Analyze", "Evaluate", "Create"],
325
  value="Analyze",
326
  label="Target Bloom’s"
327
  )
328
  target_dok = gr.Dropdown(
329
+ choices=["DOK1", "DOK2", "DOK3", "DOK4", "DOK1-DOK2", "DOK2-DOK3", "DOK3-DOK4"],
330
  value="DOK2-DOK3",
331
  label="Target Depth of Knowledge"
332
  )
333
  attempts = gr.Slider(1, 8, value=5, step=1, label="Max Attempts")
334
 
335
+ with gr.Accordion("Generation Controls", open=False):
336
  temperature = gr.Slider(0.0, 1.5, value=0.7, step=0.1, label="Temperature")
337
  max_tokens = gr.Slider(64, 1024, value=300, step=16, label="Max Tokens")
338
 
339
+ run_btn = gr.Button("Run Agent")
340
 
341
  final_json = gr.Code(label="Final Candidate (JSON if detected)", language="json")
342
  transcript = gr.Textbox(label="Agent Transcript", lines=18)
343
 
344
  run_btn.click(
345
  fn=run_pipeline,
346
+ inputs=[
347
+ hf_token, topic, grade, subject,
348
+ target_bloom, target_dok, attempts,
349
+ model_id, timeout, temperature, max_tokens
350
+ ],
351
  outputs=[final_json, transcript]
352
  )
353
 
354
  if __name__ == "__main__" or os.getenv("SYSTEM") == "spaces":
 
 
 
 
 
355
  demo.launch()