Spaces:
Running
Running
| """Embedding generation via OpenAI-compatible API.""" | |
| from __future__ import annotations | |
| import json | |
| import logging | |
| import math | |
| import os | |
| import urllib.request | |
| from typing import List, Optional, Tuple | |
| logger = logging.getLogger("openspace.cloud") | |
| # Constants (duplicated here to avoid top-level import of skill_ranker) | |
| SKILL_EMBEDDING_MODEL = "openai/text-embedding-3-small" | |
| SKILL_EMBEDDING_MAX_CHARS = 12_000 | |
| SKILL_EMBEDDING_DIMENSIONS = 1536 | |
| _OPENROUTER_BASE = "https://openrouter.ai/api/v1" | |
| _OPENAI_BASE = "https://api.openai.com/v1" | |
| def resolve_embedding_api() -> Tuple[Optional[str], str]: | |
| """Resolve API key and base URL for embedding requests. | |
| Priority: | |
| 1. ``OPENROUTER_API_KEY`` β OpenRouter base URL | |
| 2. ``OPENAI_API_KEY`` + ``OPENAI_BASE_URL`` (default ``api.openai.com``) | |
| 3. host-agent config (nanobot / openclaw) | |
| Returns: | |
| ``(api_key, base_url)`` β *api_key* may be ``None`` when no key is found. | |
| """ | |
| or_key = os.environ.get("OPENROUTER_API_KEY") | |
| if or_key: | |
| return or_key, _OPENROUTER_BASE | |
| oa_key = os.environ.get("OPENAI_API_KEY") | |
| if oa_key: | |
| base = os.environ.get("OPENAI_BASE_URL", _OPENAI_BASE).rstrip("/") | |
| return oa_key, base | |
| try: | |
| from openspace.host_detection import get_openai_api_key | |
| host_key = get_openai_api_key() | |
| if host_key: | |
| base = os.environ.get("OPENAI_BASE_URL", _OPENAI_BASE).rstrip("/") | |
| return host_key, base | |
| except Exception: | |
| pass | |
| return None, _OPENAI_BASE | |
| def cosine_similarity(a: List[float], b: List[float]) -> float: | |
| """Compute cosine similarity between two vectors.""" | |
| if len(a) != len(b) or not a: | |
| return 0.0 | |
| dot = sum(x * y for x, y in zip(a, b)) | |
| norm_a = math.sqrt(sum(x * x for x in a)) | |
| norm_b = math.sqrt(sum(x * x for x in b)) | |
| if norm_a == 0 or norm_b == 0: | |
| return 0.0 | |
| return dot / (norm_a * norm_b) | |
| def build_skill_embedding_text( | |
| name: str, | |
| description: str, | |
| readme_body: str, | |
| max_chars: int = SKILL_EMBEDDING_MAX_CHARS, | |
| ) -> str: | |
| """Build text for skill embedding: ``name + description + SKILL.md body``. | |
| Unified strategy matching MCP search_skills and clawhub platform. | |
| """ | |
| header = "\n".join(filter(None, [name, description])) | |
| raw = "\n\n".join(filter(None, [header, readme_body])) | |
| if len(raw) <= max_chars: | |
| return raw | |
| return raw[:max_chars] | |
| def generate_embedding(text: str, api_key: Optional[str] = None) -> Optional[List[float]]: | |
| """Generate embedding using OpenAI-compatible API. | |
| When *api_key* is ``None``, credentials are resolved automatically via | |
| :func:`resolve_embedding_api` (``OPENROUTER_API_KEY`` β ``OPENAI_API_KEY`` | |
| β host-agent config). | |
| This is a **synchronous** call (uses urllib). In async contexts, | |
| wrap with ``asyncio.to_thread()``. | |
| Args: | |
| text: The text to embed. | |
| api_key: Explicit API key. When provided, base URL is still resolved | |
| from environment (``OPENROUTER_API_KEY`` presence determines | |
| the endpoint). | |
| Returns: | |
| Embedding vector, or None on failure. | |
| """ | |
| resolved_key, base_url = resolve_embedding_api() | |
| if api_key is None: | |
| api_key = resolved_key | |
| if not api_key: | |
| return None | |
| body = json.dumps({ | |
| "model": SKILL_EMBEDDING_MODEL, | |
| "input": text, | |
| }).encode("utf-8") | |
| req = urllib.request.Request( | |
| f"{base_url}/embeddings", | |
| data=body, | |
| headers={ | |
| "Content-Type": "application/json", | |
| "Authorization": f"Bearer {api_key}", | |
| }, | |
| method="POST", | |
| ) | |
| try: | |
| with urllib.request.urlopen(req, timeout=15) as resp: | |
| data = json.loads(resp.read().decode("utf-8")) | |
| return data.get("data", [{}])[0].get("embedding") | |
| except Exception as e: | |
| logger.warning("Embedding generation failed: %s", e) | |
| return None | |