Yash030 Claude Opus 4.7 commited on
Commit
9358a6f
·
1 Parent(s): 948c8f9

Optimize auto routing: Zen unlimited, smarter fallback skipping

Browse files

- Zen provider gets 9999 req/min scoped limiter (no rate limit wait)
- Silent skip for blocked NIM providers (no failure penalty)
- Zen blocked check uses debug instead of warning level
- Minor ruff fixes (quoted types, ternary style)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

Files changed (2) hide show
  1. api/services.py +3 -3
  2. providers/rate_limit.py +8 -4
api/services.py CHANGED
@@ -251,11 +251,11 @@ class ClaudeProxyService:
251
 
252
  limiter = GlobalRateLimiter.get_scoped_instance(resolved.provider_id)
253
  if limiter.is_blocked() and resolved.provider_id != "zen":
254
- logger.warning(
255
- "Provider '{}' is currently rate limited, skipping to next candidate...",
 
256
  resolved.provider_id,
257
  )
258
- last_exc = Exception("Rate limited")
259
  continue
260
 
261
  # Check model health (recent failures)
 
251
 
252
  limiter = GlobalRateLimiter.get_scoped_instance(resolved.provider_id)
253
  if limiter.is_blocked() and resolved.provider_id != "zen":
254
+ # Silently skip — no failure penalty for temporary rate limit
255
+ logger.debug(
256
+ "Skipping blocked provider '{}' (no penalty)",
257
  resolved.provider_id,
258
  )
 
259
  continue
260
 
261
  # Check model health (recent failures)
providers/rate_limit.py CHANGED
@@ -19,7 +19,7 @@ T = TypeVar("T")
19
  class ModelHealthTracker:
20
  """Track per-model health based on recent failures."""
21
 
22
- _instance: ClassVar["ModelHealthTracker | None"] = None
23
 
24
  def __init__(self, failure_ttl: float = 30.0, max_failures: int = 3) -> None:
25
  self._failure_ttl = failure_ttl
@@ -27,7 +27,7 @@ class ModelHealthTracker:
27
  self._failures: dict[str, list[float]] = {}
28
 
29
  @classmethod
30
- def get_instance(cls) -> "ModelHealthTracker":
31
  if cls._instance is None:
32
  cls._instance = cls()
33
  return cls._instance
@@ -149,10 +149,14 @@ class GlobalRateLimiter:
149
  rate_window: float | None = None,
150
  max_concurrency: int = 5,
151
  ) -> GlobalRateLimiter:
152
- """Get or create a provider-scoped limiter instance."""
 
 
 
 
153
  if not scope:
154
  raise ValueError("scope must be non-empty")
155
- desired_rate_limit = rate_limit or 40
156
  desired_rate_window = float(rate_window or 60.0)
157
  existing = cls._scoped_instances.get(scope)
158
  if existing and existing.matches_config(
 
19
  class ModelHealthTracker:
20
  """Track per-model health based on recent failures."""
21
 
22
+ _instance: ClassVar[ModelHealthTracker | None] = None
23
 
24
  def __init__(self, failure_ttl: float = 30.0, max_failures: int = 3) -> None:
25
  self._failure_ttl = failure_ttl
 
27
  self._failures: dict[str, list[float]] = {}
28
 
29
  @classmethod
30
+ def get_instance(cls) -> ModelHealthTracker:
31
  if cls._instance is None:
32
  cls._instance = cls()
33
  return cls._instance
 
149
  rate_window: float | None = None,
150
  max_concurrency: int = 5,
151
  ) -> GlobalRateLimiter:
152
+ """Get or create a provider-scoped limiter instance.
153
+
154
+ Zen gets unlimited rate (9999) since it has no rate limits.
155
+ NIM and others use the configured or default 40 req/min.
156
+ """
157
  if not scope:
158
  raise ValueError("scope must be non-empty")
159
+ desired_rate_limit = 9999 if scope == "zen" else rate_limit or 40
160
  desired_rate_window = float(rate_window or 60.0)
161
  existing = cls._scoped_instances.get(scope)
162
  if existing and existing.matches_config(