Spaces:
Running
Running
| # Inference provider selection | |
| # CI trigger marker: keep this file touchable to force backend deploy workflow runs when needed. | |
| INFERENCE_PROVIDER=hf_inference | |
| INFERENCE_PRO_ENABLED=true | |
| INFERENCE_PRO_PROVIDER=hf_inference | |
| INFERENCE_GPU_PROVIDER=hf_inference | |
| INFERENCE_CPU_PROVIDER=hf_inference | |
| INFERENCE_ENABLE_PROVIDER_FALLBACK=true | |
| INFERENCE_PRO_PRIORITY_TASKS=chat,verify_solution | |
| INFERENCE_PRO_ROUTE_HEADER_NAME= | |
| INFERENCE_PRO_ROUTE_HEADER_VALUE=true | |
| # task policy sets, comma-separated | |
| INFERENCE_GPU_REQUIRED_TASKS=chat | |
| INFERENCE_CPU_ONLY_TASKS=risk_classification,analytics_aggregation,file_parsing,auth,default_cpu | |
| INFERENCE_INTERACTIVE_TASKS=chat,verify_solution,daily_insight | |
| ENABLE_LLM_RISK_RECOMMENDATIONS=true | |
| # local_space provider settings | |
| # Accepts either runtime host (https://<owner>-<space>.hf.space) or | |
| # Space page URL (https://huggingface.co/spaces/<owner>/<space>). | |
| # Example: https://huggingface.co/spaces/Deign86/mathpulse-ai | |
| INFERENCE_LOCAL_SPACE_URL=http://127.0.0.1:7860 | |
| INFERENCE_LOCAL_SPACE_GENERATE_PATH=/gradio_api/call/generate | |
| INFERENCE_LOCAL_SPACE_TIMEOUT_SEC=180 | |
| # hf_inference provider settings | |
| # Alternative env names accepted by runtime/startup checks: HUGGING_FACE_API_TOKEN, HUGGINGFACE_API_TOKEN | |
| HF_TOKEN=your_hf_token | |
| FIREBASE_AUTH_PROJECT_ID=mathpulse-ai-2026 | |
| # Prefer one of the options below for backend Firestore/Admin access in deployment: | |
| # FIREBASE_SERVICE_ACCOUNT_JSON={"type":"service_account",...} | |
| # FIREBASE_SERVICE_ACCOUNT_FILE=/path/to/service-account.json | |
| INFERENCE_HF_BASE_URL=https://router.huggingface.co/hf-inference/models | |
| INFERENCE_HF_CHAT_URL=https://router.huggingface.co/v1/chat/completions | |
| INFERENCE_HF_TIMEOUT_SEC=90 | |
| INFERENCE_INTERACTIVE_TIMEOUT_SEC=55 | |
| INFERENCE_BACKGROUND_TIMEOUT_SEC=120 | |
| # Curriculum PDF storage | |
| # Store the binary curriculum files in a Hugging Face dataset or Space repo, | |
| # then point the backend at that repo so it downloads them at build/startup time. | |
| CURRICULUM_SOURCE_REPO_ID=Deign86/mathpulse-curriculum | |
| CURRICULUM_SOURCE_REPO_TYPE=dataset | |
| CURRICULUM_SOURCE_REVISION=main | |
| # Transactional email settings for admin-created accounts | |
| # Primary provider: Brevo Transactional API | |
| BREVO_API_KEY= | |
| # Optional: Brevo MCP token (base64 JSON containing api_key) if BREVO_API_KEY is not set | |
| BREVO_MCP_TOKEN= | |
| # Optional SMTP fallback provider (Brevo SMTP relay) | |
| BREVO_SMTP_LOGIN= | |
| BREVO_SMTP_KEY= | |
| BREVO_SMTP_HOST=smtp-relay.brevo.com | |
| BREVO_SMTP_PORT=587 | |
| MAIL_FROM_ADDRESS=noreply@mathpulse.ai | |
| MAIL_FROM_NAME=MathPulse AI | |
| MAIL_SEND_TIMEOUT_SEC=15 | |
| APP_LOGIN_URL=https://mathpulse.ai | |
| # Optional: absolute http(s) URL used as the email header avatar image. | |
| # If unset, backend derives this from APP_LOGIN_URL + /avatar/avatar_icon.png. | |
| APP_BRAND_AVATAR_URL= | |
| # model defaults | |
| # Global default model for all tasks. | |
| INFERENCE_MODEL_ID=Qwen/Qwen3-32B | |
| INFERENCE_ENFORCE_QWEN_ONLY=true | |
| INFERENCE_QWEN_LOCK_MODEL=Qwen/Qwen3-32B | |
| INFERENCE_MAX_NEW_TOKENS=8192 | |
| INFERENCE_TEMPERATURE=0.2 | |
| INFERENCE_TOP_P=0.9 | |
| INFERENCE_CHAT_MODEL_ID=Qwen/Qwen3-32B | |
| # Temporary chat-only override for experiments (clear to roll back instantly). | |
| # Example: Qwen/Qwen3-32B | |
| INFERENCE_CHAT_MODEL_TEMP_OVERRIDE= | |
| INFERENCE_CHAT_STRICT_MODEL_ONLY=true | |
| INFERENCE_CHAT_HARD_MODEL_ID=meta-llama/Meta-Llama-3-70B-Instruct | |
| INFERENCE_CHAT_HARD_TRIGGER_ENABLED=false | |
| INFERENCE_CHAT_HARD_PROMPT_CHARS=650 | |
| INFERENCE_CHAT_HARD_HISTORY_CHARS=1500 | |
| INFERENCE_CHAT_HARD_KEYWORDS=step-by-step,show all steps,explain each step,justify each step,derive,derivation,proof,prove,rigorous,multi-step,word problem | |
| CHAT_MAX_NEW_TOKENS=8192 | |
| CHAT_STREAM_NO_TOKEN_TIMEOUT_SEC=90 | |
| CHAT_STREAM_TOTAL_TIMEOUT_SEC=900 | |
| CHAT_STREAM_CONTINUATION_ENABLED=true | |
| CHAT_STREAM_CONTINUATION_MAX_ROUNDS=2 | |
| CHAT_STREAM_CONTINUATION_MIN_NEW_CHARS=24 | |
| CHAT_STREAM_CONTINUATION_TAIL_CHARS=900 | |
| CHAT_STREAM_COMPLETION_MODE_DEFAULT=auto | |
| # Optional: force quiz-generation model. Leave empty to use routing.task_model_map.quiz_generation. | |
| HF_QUIZ_MODEL_ID= | |
| HF_QUIZ_JSON_REPAIR_MODEL_ID=Qwen/Qwen3-32B | |
| # retry behavior | |
| INFERENCE_MAX_RETRIES=3 | |
| INFERENCE_BACKOFF_SEC=1.5 | |
| INFERENCE_INTERACTIVE_MAX_RETRIES=1 | |
| INFERENCE_BACKGROUND_MAX_RETRIES=3 | |
| INFERENCE_INTERACTIVE_BACKOFF_SEC=1.0 | |
| INFERENCE_BACKGROUND_BACKOFF_SEC=1.75 | |
| INFERENCE_INTERACTIVE_MAX_FALLBACK_DEPTH=1 | |
| # Max simultaneous blocking HF calls allowed from async endpoints. | |
| HF_BLOCKING_CALL_CONCURRENCY=16 | |
| HF_ASYNC_MAX_CONNECTIONS=64 | |
| HF_ASYNC_MAX_KEEPALIVE_CONNECTIONS=32 | |
| HF_ASYNC_CONNECT_TIMEOUT_SEC=10.0 | |
| HF_ASYNC_WRITE_TIMEOUT_SEC=30.0 | |
| HF_ASYNC_POOL_TIMEOUT_SEC=10.0 | |
| # fallback model ids, comma-separated | |
| INFERENCE_FALLBACK_MODELS= | |
| # async generation controls | |
| ENABLE_ASYNC_GENERATION=true | |
| ASYNC_TASK_TTL_SECONDS=3600 | |
| ASYNC_TASK_MAX_ITEMS=400 | |