Spaces:
Running
Running
github-actions[bot] commited on
Commit ·
cb5a5af
1
Parent(s): d778d65
🚀 Auto-deploy backend from GitHub (7408e56)
Browse files- Dockerfile +1 -0
- config/env.sample +3 -3
- config/models.yaml +13 -14
- services/inference_client.py +6 -6
Dockerfile
CHANGED
|
@@ -23,6 +23,7 @@ RUN python -m pip install --upgrade pip setuptools wheel && \
|
|
| 23 |
|
| 24 |
# Copy only runtime sources to reduce invalidation surface.
|
| 25 |
COPY main.py /app/main.py
|
|
|
|
| 26 |
COPY analytics.py /app/analytics.py
|
| 27 |
COPY automation_engine.py /app/automation_engine.py
|
| 28 |
COPY services /app/services
|
|
|
|
| 23 |
|
| 24 |
# Copy only runtime sources to reduce invalidation surface.
|
| 25 |
COPY main.py /app/main.py
|
| 26 |
+
COPY startup_validation.py /app/startup_validation.py
|
| 27 |
COPY analytics.py /app/analytics.py
|
| 28 |
COPY automation_engine.py /app/automation_engine.py
|
| 29 |
COPY services /app/services
|
config/env.sample
CHANGED
|
@@ -38,8 +38,8 @@ INFERENCE_INTERACTIVE_TIMEOUT_SEC=55
|
|
| 38 |
INFERENCE_BACKGROUND_TIMEOUT_SEC=120
|
| 39 |
|
| 40 |
# model defaults
|
| 41 |
-
#
|
| 42 |
-
INFERENCE_MODEL_ID=
|
| 43 |
INFERENCE_MAX_NEW_TOKENS=640
|
| 44 |
INFERENCE_TEMPERATURE=0.2
|
| 45 |
INFERENCE_TOP_P=0.9
|
|
@@ -54,7 +54,7 @@ CHAT_STREAM_NO_TOKEN_TIMEOUT_SEC=25
|
|
| 54 |
CHAT_STREAM_TOTAL_TIMEOUT_SEC=120
|
| 55 |
# Optional: force quiz-generation model. Leave empty to use routing.task_model_map.quiz_generation.
|
| 56 |
HF_QUIZ_MODEL_ID=
|
| 57 |
-
HF_QUIZ_JSON_REPAIR_MODEL_ID=
|
| 58 |
|
| 59 |
# retry behavior
|
| 60 |
INFERENCE_MAX_RETRIES=3
|
|
|
|
| 38 |
INFERENCE_BACKGROUND_TIMEOUT_SEC=120
|
| 39 |
|
| 40 |
# model defaults
|
| 41 |
+
# Global default model for all tasks.
|
| 42 |
+
INFERENCE_MODEL_ID=Qwen/Qwen2.5-7B-Instruct
|
| 43 |
INFERENCE_MAX_NEW_TOKENS=640
|
| 44 |
INFERENCE_TEMPERATURE=0.2
|
| 45 |
INFERENCE_TOP_P=0.9
|
|
|
|
| 54 |
CHAT_STREAM_TOTAL_TIMEOUT_SEC=120
|
| 55 |
# Optional: force quiz-generation model. Leave empty to use routing.task_model_map.quiz_generation.
|
| 56 |
HF_QUIZ_MODEL_ID=
|
| 57 |
+
HF_QUIZ_JSON_REPAIR_MODEL_ID=Qwen/Qwen2.5-7B-Instruct
|
| 58 |
|
| 59 |
# retry behavior
|
| 60 |
INFERENCE_MAX_RETRIES=3
|
config/models.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
models:
|
| 2 |
primary:
|
| 3 |
-
id:
|
| 4 |
-
description:
|
| 5 |
max_new_tokens: 640
|
| 6 |
temperature: 0.25
|
| 7 |
top_p: 0.9
|
|
@@ -26,24 +26,23 @@ models:
|
|
| 26 |
|
| 27 |
routing:
|
| 28 |
task_model_map:
|
| 29 |
-
#
|
| 30 |
-
# Hard prompts can escalate
|
| 31 |
-
chat:
|
| 32 |
-
verify_solution:
|
| 33 |
-
lesson_generation:
|
| 34 |
-
quiz_generation:
|
| 35 |
-
learning_path:
|
| 36 |
-
daily_insight:
|
| 37 |
-
risk_classification:
|
| 38 |
-
risk_narrative:
|
| 39 |
|
| 40 |
task_fallback_model_map:
|
| 41 |
chat:
|
| 42 |
- meta-llama/Meta-Llama-3-70B-Instruct # Hard/fallback quality tier
|
| 43 |
- google/gemma-2-2b-it # Fast safety fallback
|
| 44 |
verify_solution:
|
| 45 |
-
-
|
| 46 |
-
- meta-llama/Meta-Llama-3-70B-Instruct # First fallback
|
| 47 |
- meta-llama/Llama-3.1-8B-Instruct # Second fallback
|
| 48 |
|
| 49 |
task_provider_map:
|
|
|
|
| 1 |
models:
|
| 2 |
primary:
|
| 3 |
+
id: Qwen/Qwen2.5-7B-Instruct
|
| 4 |
+
description: Global default instruction model for interactive Grade 11-12 math tutoring
|
| 5 |
max_new_tokens: 640
|
| 6 |
temperature: 0.25
|
| 7 |
top_p: 0.9
|
|
|
|
| 26 |
|
| 27 |
routing:
|
| 28 |
task_model_map:
|
| 29 |
+
# Global default: Qwen2.5-7B across all tasks.
|
| 30 |
+
# Hard prompts can still escalate via runtime policy in inference_client.
|
| 31 |
+
chat: Qwen/Qwen2.5-7B-Instruct
|
| 32 |
+
verify_solution: Qwen/Qwen2.5-7B-Instruct
|
| 33 |
+
lesson_generation: Qwen/Qwen2.5-7B-Instruct
|
| 34 |
+
quiz_generation: Qwen/Qwen2.5-7B-Instruct
|
| 35 |
+
learning_path: Qwen/Qwen2.5-7B-Instruct
|
| 36 |
+
daily_insight: Qwen/Qwen2.5-7B-Instruct
|
| 37 |
+
risk_classification: Qwen/Qwen2.5-7B-Instruct
|
| 38 |
+
risk_narrative: Qwen/Qwen2.5-7B-Instruct
|
| 39 |
|
| 40 |
task_fallback_model_map:
|
| 41 |
chat:
|
| 42 |
- meta-llama/Meta-Llama-3-70B-Instruct # Hard/fallback quality tier
|
| 43 |
- google/gemma-2-2b-it # Fast safety fallback
|
| 44 |
verify_solution:
|
| 45 |
+
- meta-llama/Meta-Llama-3-70B-Instruct # Higher-capacity fallback
|
|
|
|
| 46 |
- meta-llama/Llama-3.1-8B-Instruct # Second fallback
|
| 47 |
|
| 48 |
task_provider_map:
|
services/inference_client.py
CHANGED
|
@@ -107,7 +107,7 @@ class InferenceClient:
|
|
| 107 |
self.pro_route_header_name = os.getenv("INFERENCE_PRO_ROUTE_HEADER_NAME", "")
|
| 108 |
self.pro_route_header_value = os.getenv("INFERENCE_PRO_ROUTE_HEADER_VALUE", "true")
|
| 109 |
|
| 110 |
-
default_model_fallback = str(primary.get("id") or "
|
| 111 |
env_model_id = os.getenv("INFERENCE_MODEL_ID", "").strip()
|
| 112 |
self.default_model = env_model_id or default_model_fallback
|
| 113 |
|
|
@@ -177,16 +177,16 @@ class InferenceClient:
|
|
| 177 |
int(os.getenv("INFERENCE_INTERACTIVE_MAX_FALLBACK_DEPTH", "1")),
|
| 178 |
)
|
| 179 |
|
| 180 |
-
# Default task-to-model routing (
|
| 181 |
self.task_model_map: Dict[str, str] = {
|
| 182 |
-
"chat": "
|
| 183 |
"verify_solution": "Qwen/Qwen2.5-7B-Instruct",
|
| 184 |
"lesson_generation": "Qwen/Qwen2.5-7B-Instruct",
|
| 185 |
"quiz_generation": "Qwen/Qwen2.5-7B-Instruct",
|
| 186 |
"learning_path": "Qwen/Qwen2.5-7B-Instruct",
|
| 187 |
-
"daily_insight":
|
| 188 |
-
"risk_classification":
|
| 189 |
-
"risk_narrative":
|
| 190 |
}
|
| 191 |
# Fallback chains (only to other HF-supported models, no featherless-ai)
|
| 192 |
self.task_fallback_model_map: Dict[str, List[str]] = {
|
|
|
|
| 107 |
self.pro_route_header_name = os.getenv("INFERENCE_PRO_ROUTE_HEADER_NAME", "")
|
| 108 |
self.pro_route_header_value = os.getenv("INFERENCE_PRO_ROUTE_HEADER_VALUE", "true")
|
| 109 |
|
| 110 |
+
default_model_fallback = str(primary.get("id") or "Qwen/Qwen2.5-7B-Instruct")
|
| 111 |
env_model_id = os.getenv("INFERENCE_MODEL_ID", "").strip()
|
| 112 |
self.default_model = env_model_id or default_model_fallback
|
| 113 |
|
|
|
|
| 177 |
int(os.getenv("INFERENCE_INTERACTIVE_MAX_FALLBACK_DEPTH", "1")),
|
| 178 |
)
|
| 179 |
|
| 180 |
+
# Default task-to-model routing (global default set to Qwen2.5-7B)
|
| 181 |
self.task_model_map: Dict[str, str] = {
|
| 182 |
+
"chat": "Qwen/Qwen2.5-7B-Instruct",
|
| 183 |
"verify_solution": "Qwen/Qwen2.5-7B-Instruct",
|
| 184 |
"lesson_generation": "Qwen/Qwen2.5-7B-Instruct",
|
| 185 |
"quiz_generation": "Qwen/Qwen2.5-7B-Instruct",
|
| 186 |
"learning_path": "Qwen/Qwen2.5-7B-Instruct",
|
| 187 |
+
"daily_insight": "Qwen/Qwen2.5-7B-Instruct",
|
| 188 |
+
"risk_classification": "Qwen/Qwen2.5-7B-Instruct",
|
| 189 |
+
"risk_narrative": "Qwen/Qwen2.5-7B-Instruct",
|
| 190 |
}
|
| 191 |
# Fallback chains (only to other HF-supported models, no featherless-ai)
|
| 192 |
self.task_fallback_model_map: Dict[str, List[str]] = {
|