Spaces:
Running
Running
Optimize auto routing: Zen unlimited, smarter fallback skipping
Browse files- Zen provider gets 9999 req/min scoped limiter (no rate limit wait)
- Silent skip for blocked NIM providers (no failure penalty)
- Zen blocked check uses debug instead of warning level
- Minor ruff fixes (quoted types, ternary style)
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
- api/services.py +3 -3
- providers/rate_limit.py +8 -4
api/services.py
CHANGED
|
@@ -251,11 +251,11 @@ class ClaudeProxyService:
|
|
| 251 |
|
| 252 |
limiter = GlobalRateLimiter.get_scoped_instance(resolved.provider_id)
|
| 253 |
if limiter.is_blocked() and resolved.provider_id != "zen":
|
| 254 |
-
|
| 255 |
-
|
|
|
|
| 256 |
resolved.provider_id,
|
| 257 |
)
|
| 258 |
-
last_exc = Exception("Rate limited")
|
| 259 |
continue
|
| 260 |
|
| 261 |
# Check model health (recent failures)
|
|
|
|
| 251 |
|
| 252 |
limiter = GlobalRateLimiter.get_scoped_instance(resolved.provider_id)
|
| 253 |
if limiter.is_blocked() and resolved.provider_id != "zen":
|
| 254 |
+
# Silently skip — no failure penalty for temporary rate limit
|
| 255 |
+
logger.debug(
|
| 256 |
+
"Skipping blocked provider '{}' (no penalty)",
|
| 257 |
resolved.provider_id,
|
| 258 |
)
|
|
|
|
| 259 |
continue
|
| 260 |
|
| 261 |
# Check model health (recent failures)
|
providers/rate_limit.py
CHANGED
|
@@ -19,7 +19,7 @@ T = TypeVar("T")
|
|
| 19 |
class ModelHealthTracker:
|
| 20 |
"""Track per-model health based on recent failures."""
|
| 21 |
|
| 22 |
-
_instance: ClassVar[
|
| 23 |
|
| 24 |
def __init__(self, failure_ttl: float = 30.0, max_failures: int = 3) -> None:
|
| 25 |
self._failure_ttl = failure_ttl
|
|
@@ -27,7 +27,7 @@ class ModelHealthTracker:
|
|
| 27 |
self._failures: dict[str, list[float]] = {}
|
| 28 |
|
| 29 |
@classmethod
|
| 30 |
-
def get_instance(cls) ->
|
| 31 |
if cls._instance is None:
|
| 32 |
cls._instance = cls()
|
| 33 |
return cls._instance
|
|
@@ -149,10 +149,14 @@ class GlobalRateLimiter:
|
|
| 149 |
rate_window: float | None = None,
|
| 150 |
max_concurrency: int = 5,
|
| 151 |
) -> GlobalRateLimiter:
|
| 152 |
-
"""Get or create a provider-scoped limiter instance.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
if not scope:
|
| 154 |
raise ValueError("scope must be non-empty")
|
| 155 |
-
desired_rate_limit = rate_limit or 40
|
| 156 |
desired_rate_window = float(rate_window or 60.0)
|
| 157 |
existing = cls._scoped_instances.get(scope)
|
| 158 |
if existing and existing.matches_config(
|
|
|
|
| 19 |
class ModelHealthTracker:
|
| 20 |
"""Track per-model health based on recent failures."""
|
| 21 |
|
| 22 |
+
_instance: ClassVar[ModelHealthTracker | None] = None
|
| 23 |
|
| 24 |
def __init__(self, failure_ttl: float = 30.0, max_failures: int = 3) -> None:
|
| 25 |
self._failure_ttl = failure_ttl
|
|
|
|
| 27 |
self._failures: dict[str, list[float]] = {}
|
| 28 |
|
| 29 |
@classmethod
|
| 30 |
+
def get_instance(cls) -> ModelHealthTracker:
|
| 31 |
if cls._instance is None:
|
| 32 |
cls._instance = cls()
|
| 33 |
return cls._instance
|
|
|
|
| 149 |
rate_window: float | None = None,
|
| 150 |
max_concurrency: int = 5,
|
| 151 |
) -> GlobalRateLimiter:
|
| 152 |
+
"""Get or create a provider-scoped limiter instance.
|
| 153 |
+
|
| 154 |
+
Zen gets unlimited rate (9999) since it has no rate limits.
|
| 155 |
+
NIM and others use the configured or default 40 req/min.
|
| 156 |
+
"""
|
| 157 |
if not scope:
|
| 158 |
raise ValueError("scope must be non-empty")
|
| 159 |
+
desired_rate_limit = 9999 if scope == "zen" else rate_limit or 40
|
| 160 |
desired_rate_window = float(rate_window or 60.0)
|
| 161 |
existing = cls._scoped_instances.get(scope)
|
| 162 |
if existing and existing.matches_config(
|