| """ | |
| Ollama client wrapper for MiniMax-M2. | |
| Compatible with /api/generate streaming endpoint. | |
| """ | |
| import os | |
| import requests | |
| import json | |
| from typing import Dict | |
| OLLAMA_API_URL = os.getenv("OLLAMA_API_URL", "http://127.0.0.1:11434") | |
| OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "minimax-m2:cloud") | |
| DEFAULT_TEMPERATURE = float(os.getenv("DEFAULT_TEMPERATURE", 0.2)) | |
| MAX_TOKENS = int(os.getenv("MAX_TOKENS", 1024)) | |
| def ask_ollama(prompt: str, extra: Dict = None) -> str: | |
| """Send a prompt to the local Ollama API and return the generated text.""" | |
| payload = { | |
| "model": OLLAMA_MODEL, | |
| "prompt": prompt, | |
| "options": { | |
| "temperature": DEFAULT_TEMPERATURE, | |
| "num_predict": MAX_TOKENS | |
| } | |
| } | |
| if extra: | |
| payload.update(extra) | |
| url = f"{OLLAMA_API_URL}/api/generate" | |
| resp = requests.post(url, json=payload, stream=True, timeout=180) | |
| resp.raise_for_status() | |
| response_text = "" | |
| for line in resp.iter_lines(): | |
| if not line: | |
| continue | |
| try: | |
| data = json.loads(line.decode("utf-8")) | |
| if "response" in data: | |
| response_text += data["response"] | |
| except Exception: | |
| continue | |
| return response_text.strip() | |