Yash030 Claude Opus 4.7 commited on
Commit
43ea069
·
1 Parent(s): 6339a53

Add Cerebras and Silicon Flow provider support

Browse files

- Cerebras: qwen/qwen-3-235b-a22b-instruct-2507 (Qwen 3 235B)
- Silicon Flow: Qwen3.6-35B/27B, Qwen3.5-35B/27B (VLM multimodal),
Gemma-4-26B/31B reasoning models
- Both use OpenAI-compatible /chat/completions via OpenAIChatTransport
- API keys: CEREBRAS_API_KEY, SILICON_API_KEY

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

api/services.py CHANGED
@@ -38,7 +38,7 @@ TokenCounter = Callable[[list[Any], str | list[Any] | None, list[Any] | None], i
38
  ProviderGetter = Callable[[str], BaseProvider]
39
 
40
  # Providers that use ``/chat/completions`` + Anthropic-to-OpenAI conversion (not native Messages).
41
- _OPENAI_CHAT_UPSTREAM_IDS = frozenset({"nvidia_nim", "groq", "cerebras"})
42
 
43
 
44
  def anthropic_sse_streaming_response(
 
38
  ProviderGetter = Callable[[str], BaseProvider]
39
 
40
  # Providers that use ``/chat/completions`` + Anthropic-to-OpenAI conversion (not native Messages).
41
+ _OPENAI_CHAT_UPSTREAM_IDS = frozenset({"nvidia_nim", "groq", "cerebras", "silicon"})
42
 
43
 
44
  def anthropic_sse_streaming_response(
config/provider_catalog.py CHANGED
@@ -14,6 +14,8 @@ TransportType = Literal["openai_chat", "anthropic_messages"]
14
  # Default upstream base URLs (also re-exported via :mod:`providers.defaults`)
15
  NVIDIA_NIM_DEFAULT_BASE = "https://integrate.api.nvidia.com/v1"
16
  ZEN_DEFAULT_BASE = "https://opencode.ai/zen"
 
 
17
 
18
 
19
  @dataclass(frozen=True, slots=True)
@@ -53,10 +55,28 @@ PROVIDER_CATALOG: dict[str, ProviderDescriptor] = {
53
  base_url_attr="zen_base_url",
54
  capabilities=("chat", "streaming", "tools", "thinking"),
55
  ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  }
57
 
58
  # Order matches docs; must match PROVIDER_CATALOG keys.
59
- SUPPORTED_PROVIDER_IDS: tuple[str, ...] = ("nvidia_nim", "zen")
60
 
61
  if len(set(SUPPORTED_PROVIDER_IDS)) != len(SUPPORTED_PROVIDER_IDS):
62
  raise AssertionError("Duplicate provider ids in PROVIDER_CATALOG key order")
 
14
  # Default upstream base URLs (also re-exported via :mod:`providers.defaults`)
15
  NVIDIA_NIM_DEFAULT_BASE = "https://integrate.api.nvidia.com/v1"
16
  ZEN_DEFAULT_BASE = "https://opencode.ai/zen"
17
+ CEREBRAS_DEFAULT_BASE = "https://api.cerebras.ai/v1"
18
+ SILICON_DEFAULT_BASE = "https://api.siliconflow.cn/v1"
19
 
20
 
21
  @dataclass(frozen=True, slots=True)
 
55
  base_url_attr="zen_base_url",
56
  capabilities=("chat", "streaming", "tools", "thinking"),
57
  ),
58
+ "cerebras": ProviderDescriptor(
59
+ provider_id="cerebras",
60
+ transport_type="openai_chat",
61
+ credential_env="CEREBRAS_API_KEY",
62
+ credential_url="https://cerebras.ai/labs",
63
+ credential_attr="cerebras_api_key",
64
+ default_base_url=CEREBRAS_DEFAULT_BASE,
65
+ capabilities=("chat", "streaming", "tools", "thinking"),
66
+ ),
67
+ "silicon": ProviderDescriptor(
68
+ provider_id="silicon",
69
+ transport_type="openai_chat",
70
+ credential_env="SILICON_API_KEY",
71
+ credential_url="https://siliconflow.cn",
72
+ credential_attr="silicon_api_key",
73
+ default_base_url=SILICON_DEFAULT_BASE,
74
+ capabilities=("chat", "streaming", "tools", "thinking"),
75
+ ),
76
  }
77
 
78
  # Order matches docs; must match PROVIDER_CATALOG keys.
79
+ SUPPORTED_PROVIDER_IDS: tuple[str, ...] = ("nvidia_nim", "zen", "cerebras", "silicon")
80
 
81
  if len(set(SUPPORTED_PROVIDER_IDS)) != len(SUPPORTED_PROVIDER_IDS):
82
  raise AssertionError("Duplicate provider ids in PROVIDER_CATALOG key order")
config/settings.py CHANGED
@@ -145,6 +145,10 @@ class Settings(BaseSettings):
145
  session_retention_minutes: int = Field(
146
  default=30, validation_alias="SESSION_RETENTION_MINUTES"
147
  )
 
 
 
 
148
  zen_base_url: str = Field(
149
  default="https://opencode.ai/zen", validation_alias="ZEN_BASE_URL"
150
  )
@@ -535,6 +539,10 @@ class Settings(BaseSettings):
535
  )
536
  if provider_id == "zen":
537
  return bool(self.zen_api_key.strip())
 
 
 
 
538
  # conservative default: assume not configured
539
  return False
540
 
 
145
  session_retention_minutes: int = Field(
146
  default=30, validation_alias="SESSION_RETENTION_MINUTES"
147
  )
148
+ # ==================== Cerebras Config ====================
149
+ cerebras_api_key: str = Field(default="", validation_alias="CEREBRAS_API_KEY")
150
+ # ==================== Silicon Flow Config ====================
151
+ silicon_api_key: str = Field(default="", validation_alias="SILICON_API_KEY")
152
  zen_base_url: str = Field(
153
  default="https://opencode.ai/zen", validation_alias="ZEN_BASE_URL"
154
  )
 
539
  )
540
  if provider_id == "zen":
541
  return bool(self.zen_api_key.strip())
542
+ if provider_id == "cerebras":
543
+ return bool(self.cerebras_api_key.strip())
544
+ if provider_id == "silicon":
545
+ return bool(self.silicon_api_key.strip())
546
  # conservative default: assume not configured
547
  return False
548
 
core/model_capabilities.py CHANGED
@@ -131,6 +131,101 @@ MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {
131
  speed="medium",
132
  priority=60,
133
  ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  }
135
 
136
 
 
131
  speed="medium",
132
  priority=60,
133
  ),
134
+ # Cerebras models
135
+ "cerebras/qwen/qwen-3-235b-a22b-instruct-2507": ModelCapabilities(
136
+ provider_id="cerebras",
137
+ model_id="qwen/qwen-3-235b-a22b-instruct-2507",
138
+ model_ref="cerebras/qwen/qwen-3-235b-a22b-instruct-2507",
139
+ coding=True,
140
+ reasoning=True,
141
+ general_text=True,
142
+ max_tokens=32000,
143
+ speed="slow",
144
+ priority=85,
145
+ ),
146
+ # Silicon Flow models
147
+ "silicon/Qwen/Qwen3.6-35B-A3B": ModelCapabilities(
148
+ provider_id="silicon",
149
+ model_id="Qwen/Qwen3.6-35B-A3B",
150
+ model_ref="silicon/Qwen/Qwen3.6-35B-A3B",
151
+ vision=True,
152
+ supports_base64_images=True,
153
+ max_images=1,
154
+ multimodal_input=True,
155
+ coding=True,
156
+ reasoning=True,
157
+ general_text=True,
158
+ max_tokens=262144,
159
+ speed="medium",
160
+ priority=85,
161
+ ),
162
+ "silicon/Qwen/Qwen3.6-27B": ModelCapabilities(
163
+ provider_id="silicon",
164
+ model_id="Qwen/Qwen3.6-27B",
165
+ model_ref="silicon/Qwen/Qwen3.6-27B",
166
+ vision=True,
167
+ supports_base64_images=True,
168
+ max_images=1,
169
+ multimodal_input=True,
170
+ coding=True,
171
+ reasoning=True,
172
+ general_text=True,
173
+ max_tokens=262144,
174
+ speed="medium",
175
+ priority=82,
176
+ ),
177
+ "silicon/Qwen/Qwen3.5-35B-A3B": ModelCapabilities(
178
+ provider_id="silicon",
179
+ model_id="Qwen/Qwen3.5-35B-A3B",
180
+ model_ref="silicon/Qwen/Qwen3.5-35B-A3B",
181
+ vision=True,
182
+ supports_base64_images=True,
183
+ max_images=1,
184
+ multimodal_input=True,
185
+ coding=True,
186
+ reasoning=True,
187
+ general_text=True,
188
+ max_tokens=262144,
189
+ speed="medium",
190
+ priority=80,
191
+ ),
192
+ "silicon/Qwen/Qwen3.5-27B": ModelCapabilities(
193
+ provider_id="silicon",
194
+ model_id="Qwen/Qwen3.5-27B",
195
+ model_ref="silicon/Qwen/Qwen3.5-27B",
196
+ vision=True,
197
+ supports_base64_images=True,
198
+ max_images=1,
199
+ multimodal_input=True,
200
+ coding=True,
201
+ reasoning=True,
202
+ general_text=True,
203
+ max_tokens=262144,
204
+ speed="medium",
205
+ priority=78,
206
+ ),
207
+ "silicon/google/gemma-4-26B-A4B-it": ModelCapabilities(
208
+ provider_id="silicon",
209
+ model_id="google/gemma-4-26B-A4B-it",
210
+ model_ref="silicon/google/gemma-4-26B-A4B-it",
211
+ coding=True,
212
+ reasoning=True,
213
+ general_text=True,
214
+ max_tokens=262144,
215
+ speed="fast",
216
+ priority=75,
217
+ ),
218
+ "silicon/google/gemma-4-31B-it": ModelCapabilities(
219
+ provider_id="silicon",
220
+ model_id="google/gemma-4-31B-it",
221
+ model_ref="silicon/google/gemma-4-31B-it",
222
+ coding=True,
223
+ reasoning=True,
224
+ general_text=True,
225
+ max_tokens=262144,
226
+ speed="fast",
227
+ priority=76,
228
+ ),
229
  }
230
 
231
 
providers/cerebras/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ """Cerebras provider (OpenAI-compatible /chat/completions)."""
2
+
3
+ from .client import CerebrasProvider
4
+
5
+ __all__ = ["CerebrasProvider"]
providers/cerebras/client.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Cerebras provider using OpenAI-compatible API."""
2
+
3
+ from typing import Any
4
+
5
+ from config.settings import Settings
6
+ from core.anthropic import ReasoningReplayMode, build_base_request_body
7
+ from providers.base import ProviderConfig
8
+ from providers.defaults import CEREBRAS_DEFAULT_BASE
9
+ from providers.openai_compat import OpenAIChatTransport
10
+
11
+
12
+ class CerebrasProvider(OpenAIChatTransport):
13
+ """Cerebras provider using OpenAI-compatible /chat/completions."""
14
+
15
+ def __init__(self, config: ProviderConfig, *, settings: Settings):
16
+ base_url = (config.base_url or CEREBRAS_DEFAULT_BASE).rstrip("/")
17
+ if not base_url.endswith("/v1"):
18
+ base_url = base_url + "/v1"
19
+ super().__init__(
20
+ config,
21
+ provider_name="Cerebras",
22
+ base_url=base_url,
23
+ api_key=config.api_key,
24
+ )
25
+ self._settings = settings
26
+
27
+ def _build_request_body(
28
+ self, request: Any, thinking_enabled: bool | None = None
29
+ ) -> dict:
30
+ thinking = self._is_thinking_enabled(request, thinking_enabled)
31
+ reasoning_replay = (
32
+ ReasoningReplayMode.REASONING_CONTENT
33
+ if thinking
34
+ else ReasoningReplayMode.DISABLED
35
+ )
36
+ return build_base_request_body(request, reasoning_replay=reasoning_replay)
providers/defaults.py CHANGED
@@ -1,11 +1,15 @@
1
  """Re-exports default upstream base URLs from the config provider catalog."""
2
 
3
  from config.provider_catalog import (
 
4
  NVIDIA_NIM_DEFAULT_BASE,
 
5
  ZEN_DEFAULT_BASE,
6
  )
7
 
8
  __all__ = (
 
9
  "NVIDIA_NIM_DEFAULT_BASE",
 
10
  "ZEN_DEFAULT_BASE",
11
  )
 
1
  """Re-exports default upstream base URLs from the config provider catalog."""
2
 
3
  from config.provider_catalog import (
4
+ CEREBRAS_DEFAULT_BASE,
5
  NVIDIA_NIM_DEFAULT_BASE,
6
+ SILICON_DEFAULT_BASE,
7
  ZEN_DEFAULT_BASE,
8
  )
9
 
10
  __all__ = (
11
+ "CEREBRAS_DEFAULT_BASE",
12
  "NVIDIA_NIM_DEFAULT_BASE",
13
+ "SILICON_DEFAULT_BASE",
14
  "ZEN_DEFAULT_BASE",
15
  )
providers/registry.py CHANGED
@@ -44,9 +44,23 @@ def _create_zen(config: ProviderConfig, settings: Settings) -> BaseProvider:
44
  return ZenProvider(config, settings=settings)
45
 
46
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  PROVIDER_FACTORIES: dict[str, ProviderFactory] = {
48
  "nvidia_nim": _create_nvidia_nim,
49
  "zen": _create_zen,
 
 
50
  }
51
 
52
  if set(PROVIDER_DESCRIPTORS) != set(SUPPORTED_PROVIDER_IDS) or set(
 
44
  return ZenProvider(config, settings=settings)
45
 
46
 
47
+ def _create_cerebras(config: ProviderConfig, settings: Settings) -> BaseProvider:
48
+ from providers.cerebras import CerebrasProvider
49
+
50
+ return CerebrasProvider(config, settings=settings)
51
+
52
+
53
+ def _create_silicon(config: ProviderConfig, settings: Settings) -> BaseProvider:
54
+ from providers.silicon import SiliconProvider
55
+
56
+ return SiliconProvider(config, settings=settings)
57
+
58
+
59
  PROVIDER_FACTORIES: dict[str, ProviderFactory] = {
60
  "nvidia_nim": _create_nvidia_nim,
61
  "zen": _create_zen,
62
+ "cerebras": _create_cerebras,
63
+ "silicon": _create_silicon,
64
  }
65
 
66
  if set(PROVIDER_DESCRIPTORS) != set(SUPPORTED_PROVIDER_IDS) or set(
providers/silicon/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ """Silicon Flow provider (OpenAI-compatible /chat/completions)."""
2
+
3
+ from .client import SiliconProvider
4
+
5
+ __all__ = ["SiliconProvider"]
providers/silicon/client.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Silicon Flow provider using OpenAI-compatible API."""
2
+
3
+ from typing import Any
4
+
5
+ from config.settings import Settings
6
+ from core.anthropic import ReasoningReplayMode, build_base_request_body
7
+ from providers.base import ProviderConfig
8
+ from providers.defaults import SILICON_DEFAULT_BASE
9
+ from providers.openai_compat import OpenAIChatTransport
10
+
11
+
12
+ class SiliconProvider(OpenAIChatTransport):
13
+ """Silicon Flow provider using OpenAI-compatible /chat/completions."""
14
+
15
+ def __init__(self, config: ProviderConfig, *, settings: Settings):
16
+ base_url = (config.base_url or SILICON_DEFAULT_BASE).rstrip("/")
17
+ if not base_url.endswith("/v1"):
18
+ base_url = base_url + "/v1"
19
+ super().__init__(
20
+ config,
21
+ provider_name="Silicon",
22
+ base_url=base_url,
23
+ api_key=config.api_key,
24
+ )
25
+ self._settings = settings
26
+
27
+ def _build_request_body(
28
+ self, request: Any, thinking_enabled: bool | None = None
29
+ ) -> dict:
30
+ thinking = self._is_thinking_enabled(request, thinking_enabled)
31
+ reasoning_replay = (
32
+ ReasoningReplayMode.REASONING_CONTENT
33
+ if thinking
34
+ else ReasoningReplayMode.DISABLED
35
+ )
36
+ return build_base_request_body(request, reasoning_replay=reasoning_replay)