github-actions[bot] commited on
Commit
3fcd79f
ยท
1 Parent(s): 24ef548

๐Ÿš€ Auto-deploy from GitHub (906f3b9)

Browse files
Files changed (1) hide show
  1. main.py +5 -41
main.py CHANGED
@@ -237,7 +237,6 @@ def get_client() -> InferenceClient:
237
  _zsc_client = InferenceClient(
238
  token=HF_TOKEN,
239
  timeout=60,
240
- api_url="https://router.huggingface.co/hf-inference",
241
  )
242
  logger.info("HF InferenceClient initialized (for zero-shot classification)")
243
  break
@@ -272,7 +271,7 @@ def call_hf_chat(
272
  raise RuntimeError("HF_TOKEN is not set")
273
 
274
  target_model = model or HF_MATH_MODEL_ID
275
- url = f"https://router.huggingface.co/hf-inference/models/{target_model}/v1/chat/completions"
276
  headers = {
277
  "Authorization": f"Bearer {HF_TOKEN}",
278
  "Content-Type": "application/json",
@@ -333,45 +332,10 @@ Student question:
333
 
334
 
335
  def call_math_tutor_llm(question: str) -> str:
336
- """Convenience wrapper: call the HF serverless model with the MathPulse tutor prompt."""
337
- if not HF_TOKEN:
338
- raise RuntimeError("HF_TOKEN is not set")
339
-
340
- url = f"https://router.huggingface.co/hf-inference/models/{HF_MATH_MODEL_ID}"
341
- payload = {
342
- "inputs": build_math_tutor_prompt(question),
343
- "parameters": {
344
- "max_new_tokens": 512,
345
- "temperature": 0.2,
346
- "top_p": 0.9,
347
- },
348
- }
349
- headers = {
350
- "Authorization": f"Bearer {HF_TOKEN}",
351
- "Content-Type": "application/json",
352
- }
353
-
354
- for attempt in range(3):
355
- resp = http_requests.post(url, headers=headers, json=payload, timeout=60)
356
- if resp.status_code == 503 and attempt < 2:
357
- time.sleep(3)
358
- continue
359
- if resp.status_code != 200:
360
- raise RuntimeError(f"HF Inference error {resp.status_code}: {resp.text}")
361
-
362
- data = resp.json()
363
- if isinstance(data, list) and len(data) > 0:
364
- generated = data[0].get("generated_text") or data[0].get("output_text")
365
- if generated:
366
- return generated.strip()
367
- elif isinstance(data, dict):
368
- generated = data.get("generated_text") or data.get("output_text")
369
- if generated:
370
- return generated.strip()
371
-
372
- raise RuntimeError(f"Unexpected HF response format: {data}")
373
-
374
- raise RuntimeError("HF Inference failed after retries")
375
 
376
 
377
  # โ”€โ”€โ”€ Request/Response Models โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
 
237
  _zsc_client = InferenceClient(
238
  token=HF_TOKEN,
239
  timeout=60,
 
240
  )
241
  logger.info("HF InferenceClient initialized (for zero-shot classification)")
242
  break
 
271
  raise RuntimeError("HF_TOKEN is not set")
272
 
273
  target_model = model or HF_MATH_MODEL_ID
274
+ url = "https://router.huggingface.co/v1/chat/completions"
275
  headers = {
276
  "Authorization": f"Bearer {HF_TOKEN}",
277
  "Content-Type": "application/json",
 
332
 
333
 
334
  def call_math_tutor_llm(question: str) -> str:
335
+ """Convenience wrapper: call the HF serverless model with the MathPulse tutor prompt via chat completions."""
336
+ prompt = build_math_tutor_prompt(question)
337
+ messages = [{"role": "user", "content": prompt}]
338
+ return call_hf_chat(messages, max_tokens=512, temperature=0.2, top_p=0.9)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
 
340
 
341
  # โ”€โ”€โ”€ Request/Response Models โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€