Spaces:
Sleeping
Sleeping
| """Curated Qwen model catalog for the OpenCode OpenEnv server. | |
| Lives in the server (not the primitive) because routing decisions — | |
| which HF router backend to pick for a given Qwen repo, what counts as | |
| the "default" model, whether a model supports thinking — are | |
| deployment concerns, not harness concerns. The primitive remains | |
| provider-agnostic; this catalog is what the Gradio UI and the MCP | |
| tools consult to turn a UI selection into a concrete | |
| ``(base_url, api_key, model_string, disable_thinking)`` quadruple. | |
| Backends supported: | |
| - ``vllm`` — user-supplied OpenAI-compatible endpoint (e.g. cloudflared | |
| tunnel to ``vllm serve``, or a colocated vLLM server). | |
| - ``hf_router`` — Hugging Face Inference Providers router at | |
| ``https://router.huggingface.co/v1``. Auth via ``HF_TOKEN``. | |
| Model id carries a ``:provider`` suffix to pick the HF | |
| backend (``:together``, ``:scaleway``, ``:nscale``, ...). | |
| Only HF providers verified to return ``logprobs`` are listed (see | |
| ``DOCS/HF/hf_inference_providers_logprobs.md``). | |
| """ | |
| from __future__ import annotations | |
| from typing import Literal | |
| from pydantic import BaseModel | |
| BackendKind = Literal["vllm", "hf_router"] | |
| HF_ROUTER_BASE_URL = "https://router.huggingface.co/v1" | |
| class CatalogModel(BaseModel): | |
| """One model entry in the curated Qwen catalog.""" | |
| #: Canonical HF-Hub repo id (no ``:provider`` suffix). | |
| repo: str | |
| #: Backend kind — drives routing + auth shape. | |
| backend: BackendKind | |
| #: For ``hf_router`` entries, the ``:<provider>`` suffix HF uses to | |
| #: force a specific backend inference provider. Empty for ``vllm``. | |
| hf_route: str = "" | |
| #: Whether this model supports Qwen-style thinking mode. | |
| supports_thinking: bool = False | |
| #: Short human-readable label for UI dropdowns. | |
| label: str = "" | |
| def dropdown_key(self) -> str: | |
| """Stable unique key for UI selectors.""" | |
| if self.backend == "hf_router": | |
| return f"hf-router://{self.repo}{self.hf_route}" | |
| return f"vllm://{self.repo}" | |
| def opencode_model_string(self) -> str: | |
| """Model id opencode should send to the endpoint. | |
| For HF router we bake the ``:provider`` suffix into the model | |
| string so the HF router picks the right backend. | |
| """ | |
| if self.backend == "hf_router": | |
| return f"{self.repo}{self.hf_route}" | |
| return self.repo | |
| # Ordered: self-hosted vLLM first (default), then HF router options. | |
| CATALOG: list[CatalogModel] = [ | |
| # --- Local vLLM (tunneled or colocated) --- | |
| CatalogModel( | |
| repo="Qwen/Qwen3.5-4B", | |
| backend="vllm", | |
| supports_thinking=True, | |
| label="Qwen3.5-4B (self-hosted vLLM)", | |
| ), | |
| # --- HF Inference Router (Together / Scaleway / Nscale) --- | |
| CatalogModel( | |
| repo="Qwen/Qwen3.5-397B-A17B", | |
| backend="hf_router", | |
| hf_route=":together", | |
| supports_thinking=True, | |
| label="Qwen3.5-397B-A17B — HF/Together", | |
| ), | |
| CatalogModel( | |
| repo="Qwen/Qwen3.5-397B-A17B", | |
| backend="hf_router", | |
| hf_route=":scaleway", | |
| supports_thinking=True, | |
| label="Qwen3.5-397B-A17B — HF/Scaleway", | |
| ), | |
| CatalogModel( | |
| repo="Qwen/Qwen3-Coder-480B-A35B-Instruct", | |
| backend="hf_router", | |
| hf_route=":together", | |
| supports_thinking=False, | |
| label="Qwen3-Coder-480B — HF/Together", | |
| ), | |
| CatalogModel( | |
| repo="Qwen/Qwen3-235B-A22B-Instruct-2507", | |
| backend="hf_router", | |
| hf_route=":nscale", | |
| supports_thinking=False, | |
| label="Qwen3-235B-A22B-2507 — HF/Nscale", | |
| ), | |
| CatalogModel( | |
| repo="Qwen/Qwen3-4B-Instruct-2507", | |
| backend="hf_router", | |
| hf_route=":nscale", | |
| supports_thinking=False, | |
| label="Qwen3-4B-Instruct-2507 — HF/Nscale", | |
| ), | |
| CatalogModel( | |
| repo="Qwen/Qwen3-Coder-30B-A3B-Instruct", | |
| backend="hf_router", | |
| hf_route=":scaleway", | |
| supports_thinking=False, | |
| label="Qwen3-Coder-30B-A3B — HF/Scaleway", | |
| ), | |
| ] | |
| def by_key(key: str) -> CatalogModel: | |
| """Look up a catalog entry by ``dropdown_key``. | |
| Falls back to synthesising an ad-hoc entry from the key's prefix so | |
| users can enter a custom vLLM model id or a custom HF-router model | |
| id without editing the catalog: | |
| - ``"vllm://<repo>"`` → ad-hoc vllm entry with ``repo`` as the model id. | |
| - ``"hf-router://<repo>[:<provider>]"`` → ad-hoc hf_router entry; the | |
| provider suffix (if present) is preserved verbatim in ``hf_route``. | |
| """ | |
| for m in CATALOG: | |
| if m.dropdown_key == key: | |
| return m | |
| if key.startswith("vllm://"): | |
| repo = key[len("vllm://"):].strip() | |
| if not repo: | |
| raise KeyError(f"missing model id in key: {key!r}") | |
| return CatalogModel( | |
| repo=repo, backend="vllm", supports_thinking=False, | |
| label=f"{repo} (custom vLLM)", | |
| ) | |
| if key.startswith("hf-router://"): | |
| rest = key[len("hf-router://"):].strip() | |
| if not rest: | |
| raise KeyError(f"missing model id in key: {key!r}") | |
| if ":" in rest: | |
| repo, _, suffix = rest.partition(":") | |
| hf_route = ":" + suffix | |
| else: | |
| repo, hf_route = rest, "" | |
| return CatalogModel( | |
| repo=repo, backend="hf_router", hf_route=hf_route, | |
| supports_thinking=False, | |
| label=f"{repo}{hf_route} (custom HF Router)", | |
| ) | |
| raise KeyError(f"unknown model key: {key!r}") | |
| def default_model() -> CatalogModel: | |
| """First entry (self-hosted vLLM 4B).""" | |
| return CATALOG[0] | |
| def resolve_endpoint( | |
| model_key: str, | |
| *, | |
| vllm_url: str = "", | |
| hf_token: str = "", | |
| ) -> tuple[str, str, str, "CatalogModel"]: | |
| """Translate a UI selection into ``(base_url, api_key, model_string, entry)``. | |
| Raises ``ValueError`` with a clear message when a required secret is | |
| missing so the UI can render a precise "please fill in X" message. | |
| """ | |
| m = by_key(model_key) | |
| if m.backend == "vllm": | |
| vllm_url = (vllm_url or "").strip() | |
| if not vllm_url: | |
| raise ValueError( | |
| f"model {m.dropdown_key!r} requires a vLLM base URL " | |
| "(the tunneled or in-cluster /v1 endpoint)." | |
| ) | |
| base = vllm_url.rstrip("/") | |
| if not base.endswith("/v1"): | |
| base = base + "/v1" | |
| return base, "anything", m.opencode_model_string, m | |
| if m.backend == "hf_router": | |
| hf_token = (hf_token or "").strip() | |
| if not hf_token: | |
| raise ValueError( | |
| f"model {m.dropdown_key!r} requires an HF token " | |
| "(hf_... from https://huggingface.co/settings/tokens)." | |
| ) | |
| return HF_ROUTER_BASE_URL, hf_token, m.opencode_model_string, m | |
| raise ValueError(f"unknown backend: {m.backend}") | |