# components/LLMs/Mistral.py import os import time import requests from typing import List, Dict, Tuple, Optional # Env vars (set in your HF Space / .env) TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY") TOGETHER_BASE_URL = os.getenv("TOGETHER_BASE_URL", "https://api.together.xyz/v1") TOGETHER_MODEL = os.getenv("TOGETHER_MODEL", "mistralai/Mistral-7B-Instruct-v0.3") class MistralTogetherClient: """ Wrapper around Together AI's Chat Completions API for Mistral-7B. """ def __init__( self, model: str = TOGETHER_MODEL, timeout_s: int = 25, ): if not TOGETHER_API_KEY: raise RuntimeError("Missing TOGETHER_API_KEY env var") self.model = model self.timeout_s = timeout_s self.url = f"{TOGETHER_BASE_URL}/chat/completions" self.headers = { "Authorization": f"Bearer {TOGETHER_API_KEY}", "Content-Type": "application/json", } def chat( self, messages: List[Dict[str, str]], temperature: float = 0.3, max_tokens: int = 512, ) -> Tuple[str, Dict]: """ Send chat messages to Together's API and return (text, usage). - messages: [{"role": "system"|"user"|"assistant", "content": "..."}] """ payload = { "model": self.model, "messages": messages, "temperature": float(temperature), "max_tokens": int(max_tokens), "stream": False, } for attempt in (1, 2): # one retry on transient errors try: r = requests.post( self.url, headers=self.headers, json=payload, timeout=self.timeout_s, ) if r.status_code >= 500 or r.status_code in (408, 429): raise RuntimeError(f"Upstream {r.status_code}: {r.text[:200]}") r.raise_for_status() data = r.json() text = data["choices"][0]["message"]["content"] usage = data.get("usage", {}) return text, usage except Exception as e: if attempt == 2: raise time.sleep(0.8) # backoff before retry def build_messages(user_msg: str, system: Optional[str] = None) -> List[Dict[str, str]]: """ Helper to build message arrays for chat(). """ msgs: List[Dict[str, str]] = [] if system: msgs.append({"role": "system", "content": system}) msgs.append({"role": "user", "content": user_msg}) return msgs