| | import json |
| | import struct |
| | import gzip |
| | import time |
| | import uuid |
| | import os |
| | import re |
| | import asyncio |
| | import hashlib |
| | import queue |
| | from concurrent.futures import ThreadPoolExecutor |
| | from typing import List, Optional, Dict, Any |
| | from fastapi import FastAPI, Request, HTTPException, Depends |
| | from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials |
| | from fastapi.responses import StreamingResponse |
| | from curl_cffi import requests |
| | import uvicorn |
| |
|
| | app = FastAPI() |
| | security = HTTPBearer() |
| |
|
| | |
| | COOKIES_PATH = os.environ.get("COOKIES_PATH", "cookies.json") |
| | PROXY = os.environ.get("HTTP_PROXY", None) |
| |
|
| | |
| | _executor = ThreadPoolExecutor(max_workers=16) |
| |
|
| |
|
| | def _load_cookies(path: str) -> dict: |
| | try: |
| | with open(path, 'r', encoding='utf-8') as f: |
| | cookies_list = json.load(f) |
| | return {c['name']: c['value'] for c in cookies_list} |
| | except Exception as e: |
| | print(f"Error loading cookies: {e}") |
| | return {} |
| |
|
| |
|
| | def _generate_device_id(seed: str) -> str: |
| | h = hashlib.sha256(seed.encode()).hexdigest() |
| | return str(int(h[:16], 16))[:19] |
| |
|
| |
|
| | def _generate_session_id(seed: str) -> str: |
| | h = hashlib.sha256(("session-" + seed).encode()).hexdigest() |
| | return str(int(h[:16], 16))[:19] |
| |
|
| |
|
| | def pack_connect_message(data: dict) -> bytes: |
| | payload = json.dumps(data, separators=(',', ':')).encode('utf-8') |
| | header = struct.pack('>BI', 0, len(payload)) |
| | return header + payload |
| |
|
| |
|
| | def _convert_citations(text: str) -> str: |
| | """将 Kimi 的 [^N^] 引用格式转换为 [N]""" |
| | return re.sub(r'\[\^(\d+)\^\]', r'[\1]', text) |
| |
|
| |
|
| | def _format_references(refs: list) -> str: |
| | """将搜索引用格式化为 markdown 脚注""" |
| | if not refs: |
| | return "" |
| | lines = ["\n\n---", "**Sources:**"] |
| | for ref in refs: |
| | base = ref.get("base", {}) |
| | title = base.get("title", "") |
| | url = base.get("url", "") |
| | ref_id = ref.get("id", "") |
| | if title and url: |
| | lines.append(f"[{ref_id}] [{title}]({url})") |
| | return "\n".join(lines) + "\n" |
| |
|
| |
|
| | |
| |
|
| | def _parse_kimi_frames(buffer: bytes): |
| | """解析 connect 帧,返回 (events, remaining_buffer)。 |
| | event 类型: |
| | - {"type": "text", "content": "..."} |
| | - {"type": "tool_status", "name": "...", "status": "..."} |
| | - {"type": "search_refs", "refs": [...]} |
| | - {"type": "done"} |
| | """ |
| | events = [] |
| | while len(buffer) >= 5: |
| | flag, length = struct.unpack_from('>BI', buffer, 0) |
| | if len(buffer) < 5 + length: |
| | break |
| | payload_bytes = buffer[5:5 + length] |
| | buffer = buffer[5 + length:] |
| |
|
| | if flag == 2: |
| | try: |
| | payload_bytes = gzip.decompress(payload_bytes) |
| | except: |
| | pass |
| |
|
| | if flag not in (0, 2): |
| | continue |
| |
|
| | try: |
| | data = json.loads(payload_bytes.decode('utf-8')) |
| | except Exception as e: |
| | print(f"DEBUG: Error decoding frame JSON: {e}") |
| | continue |
| |
|
| | |
| | if "done" in data: |
| | events.append({"type": "done"}) |
| | continue |
| |
|
| | |
| | if "heartbeat" in data: |
| | continue |
| |
|
| | op = data.get("op") |
| | if op not in ("set", "append"): |
| | continue |
| |
|
| | |
| | if "block" in data and "text" in data["block"]: |
| | content = data["block"]["text"].get("content", "") |
| | if content: |
| | events.append({"type": "text", "content": content}) |
| |
|
| | |
| | if "message" in data and "blocks" in data.get("message", {}): |
| | msg_role = data["message"].get("role", "") |
| | if msg_role == "assistant": |
| | content = "" |
| | for block in data["message"]["blocks"]: |
| | if "text" in block: |
| | content += block["text"].get("content", "") |
| | if content: |
| | events.append({"type": "text", "content": content}) |
| |
|
| | |
| | if "block" in data and "tool" in data["block"]: |
| | tool = data["block"]["tool"] |
| | name = tool.get("name", "") |
| | status = tool.get("status", "") |
| | if name and status: |
| | events.append({"type": "tool_status", "name": name, "status": status}) |
| |
|
| | |
| | msg = data.get("message", {}) |
| | refs = msg.get("refs", {}) |
| | if "usedSearchChunks" in refs: |
| | events.append({"type": "search_refs", "refs": refs["usedSearchChunks"]}) |
| |
|
| | return events, buffer |
| |
|
| |
|
| | |
| | API_KEY = "sk-sseworld-kimi" |
| |
|
| | |
| |
|
| | class KimiBridge: |
| | def __init__(self): |
| | self.base_url = "https://www.kimi.com" |
| |
|
| | def create_session(self, api_key: str): |
| | if api_key == API_KEY: |
| | cookies = _load_cookies(COOKIES_PATH) |
| | auth_token = cookies.get("kimi-auth", "") |
| | fingerprint_seed = "cookies-default" |
| | else: |
| | cookies = {} |
| | auth_token = api_key |
| | fingerprint_seed = api_key |
| |
|
| | device_id = _generate_device_id(fingerprint_seed) |
| | session_id = _generate_session_id(fingerprint_seed) |
| |
|
| | headers = { |
| | "accept": "*/*", |
| | "accept-language": "zh-CN,zh;q=0.9", |
| | "authorization": f"Bearer {auth_token}", |
| | "content-type": "application/connect+json", |
| | "connect-protocol-version": "1", |
| | "origin": "https://www.kimi.com", |
| | "referer": "https://www.kimi.com/", |
| | "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36", |
| | "x-language": "zh-CN", |
| | "x-msh-device-id": device_id, |
| | "x-msh-platform": "web", |
| | "x-msh-session-id": session_id, |
| | "x-msh-version": "1.0.0", |
| | "x-traffic-id": f"u{device_id[:20]}", |
| | } |
| |
|
| | return requests.Session( |
| | headers=headers, |
| | cookies=cookies, |
| | impersonate="chrome124", |
| | proxy=PROXY, |
| | ) |
| |
|
| |
|
| | bridge = KimiBridge() |
| |
|
| |
|
| | |
| |
|
| | def format_openai_stream_chunk(content: str, model: str, chat_id: str, *, role: str = None, finish_reason: str = None): |
| | delta = {} |
| | if role: |
| | delta["role"] = role |
| | if content: |
| | delta["content"] = content |
| | chunk = { |
| | "id": chat_id, |
| | "object": "chat.completion.chunk", |
| | "created": int(time.time()), |
| | "model": model, |
| | "choices": [{ |
| | "index": 0, |
| | "delta": delta, |
| | "finish_reason": finish_reason |
| | }] |
| | } |
| | return f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n" |
| |
|
| |
|
| | |
| |
|
| | def _sync_kimi_request(session, url, body_bytes): |
| | return session.post(url, data=body_bytes, stream=True, timeout=30) |
| |
|
| |
|
| | def _sync_read_all(response): |
| | """同步读取完整响应,返回 (full_text, search_refs)""" |
| | full_content = "" |
| | search_refs = [] |
| | buffer = b"" |
| | for chunk in response.iter_content(chunk_size=None): |
| | if not chunk: |
| | continue |
| | buffer += chunk |
| | events, buffer = _parse_kimi_frames(buffer) |
| | for ev in events: |
| | if ev["type"] == "text": |
| | full_content += ev["content"] |
| | elif ev["type"] == "search_refs": |
| | search_refs = ev["refs"] |
| | full_content = _convert_citations(full_content) |
| | if search_refs: |
| | full_content += _format_references(search_refs) |
| | return full_content |
| |
|
| |
|
| | |
| |
|
| | @app.middleware("http") |
| | async def log_requests(request: Request, call_next): |
| | print(f"DEBUG: Incoming request: {request.method} {request.url}") |
| | response = await call_next(request) |
| | print(f"DEBUG: Response status: {response.status_code}") |
| | return response |
| |
|
| |
|
| | KIMI_MODELS = { |
| | "kimi-k2.5": {"scenario": "SCENARIO_K2D5", "thinking": False}, |
| | "kimi-k2.5-thinking": {"scenario": "SCENARIO_K2D5", "thinking": True}, |
| | } |
| | DEFAULT_MODEL = "kimi-k2.5" |
| |
|
| |
|
| | @app.get("/v1/models") |
| | async def list_models(): |
| | return { |
| | "object": "list", |
| | "data": [ |
| | {"id": mid, "object": "model", "created": 0, "owned_by": "moonshot"} |
| | for mid in KIMI_MODELS |
| | ] |
| | } |
| |
|
| |
|
| | @app.post("/v1/chat/completions") |
| | async def chat_completions(request: Request, credentials: HTTPAuthorizationCredentials = Depends(security)): |
| | api_key = credentials.credentials |
| | print(f"DEBUG: chat_completions endpoint hit, key prefix: {api_key[:6]}...") |
| | print(f"DEBUG: Request headers: {dict(request.headers)}") |
| |
|
| | session = bridge.create_session(api_key) |
| |
|
| | try: |
| | body = await request.json() |
| | except Exception as e: |
| | print(f"DEBUG: Failed to parse request JSON: {e}") |
| | raise HTTPException(status_code=400, detail="Invalid JSON body") |
| |
|
| | messages = body.get("messages", []) |
| | model = body.get("model", "kimi-k2.5") |
| | stream = body.get("stream", False) |
| | model_config = KIMI_MODELS.get(model, KIMI_MODELS[DEFAULT_MODEL]) |
| |
|
| | print(f"DEBUG: Received request: model={model}, thinking={model_config['thinking']}, stream={stream}, messages_count={len(messages)}") |
| |
|
| | if not messages: |
| | raise HTTPException(status_code=400, detail="Messages are required") |
| |
|
| | |
| | kimi_blocks = [] |
| | for msg in messages: |
| | role = msg.get("role", "user") |
| | content = msg.get("content", "") |
| | prefix = "User: " if role == "user" else "Assistant: " |
| | kimi_blocks.append({"message_id": "", "text": {"content": f"{prefix}{content}\n"}}) |
| |
|
| | kimi_payload = { |
| | "scenario": model_config["scenario"], |
| | "tools": [{"type": "TOOL_TYPE_SEARCH", "search": {}}], |
| | "message": { |
| | "role": "user", |
| | "blocks": kimi_blocks, |
| | "scenario": model_config["scenario"] |
| | }, |
| | "options": {"thinking": model_config["thinking"]} |
| | } |
| |
|
| | print(f"DEBUG: Kimi payload size: {len(json.dumps(kimi_payload))}") |
| |
|
| | url = f"{bridge.base_url}/apiv2/kimi.gateway.chat.v1.ChatService/Chat" |
| | body_bytes = pack_connect_message(kimi_payload) |
| |
|
| | print(f"DEBUG: Forwarding to Kimi: {url}") |
| |
|
| | loop = asyncio.get_event_loop() |
| |
|
| | try: |
| | response = await loop.run_in_executor(_executor, _sync_kimi_request, session, url, body_bytes) |
| | print(f"DEBUG: Kimi response status: {response.status_code}") |
| | except Exception as e: |
| | print(f"DEBUG: Request to Kimi failed: {e}") |
| | session.close() |
| | raise HTTPException(status_code=500, detail=f"Failed to connect to Kimi: {str(e)}") |
| |
|
| | if response.status_code != 200: |
| | error_text = response.text |
| | print(f"DEBUG: Kimi error: {error_text}") |
| | session.close() |
| | raise HTTPException(status_code=response.status_code, detail=f"Kimi API error: {error_text}") |
| |
|
| | chat_id = str(uuid.uuid4()) |
| |
|
| | if stream: |
| | async def generate(): |
| | q = queue.Queue() |
| | sentinel = object() |
| | sent_role = False |
| |
|
| | def _stream_worker(): |
| | try: |
| | buf = b"" |
| | search_refs = [] |
| | for chunk in response.iter_content(chunk_size=None): |
| | if not chunk: |
| | continue |
| | buf += chunk |
| | events, buf = _parse_kimi_frames(buf) |
| | for ev in events: |
| | if ev["type"] == "text": |
| | q.put(("text", _convert_citations(ev["content"]))) |
| | elif ev["type"] == "tool_status" and ev["status"] == "STATUS_RUNNING": |
| | q.put(("text", "\n\n> [Searching...]\n\n")) |
| | elif ev["type"] == "search_refs": |
| | search_refs = ev["refs"] |
| | |
| | if search_refs: |
| | q.put(("text", _format_references(search_refs))) |
| | finally: |
| | q.put(sentinel) |
| | session.close() |
| |
|
| | loop.run_in_executor(_executor, _stream_worker) |
| |
|
| | while True: |
| | try: |
| | item = await loop.run_in_executor(None, q.get, True, 0.5) |
| | except: |
| | continue |
| | if item is sentinel: |
| | break |
| | _, content = item |
| | if not sent_role: |
| | yield format_openai_stream_chunk(content, model, chat_id, role="assistant") |
| | sent_role = True |
| | else: |
| | yield format_openai_stream_chunk(content, model, chat_id) |
| |
|
| | |
| | yield format_openai_stream_chunk("", model, chat_id, finish_reason="stop") |
| | yield "data: [DONE]\n\n" |
| |
|
| | return StreamingResponse(generate(), media_type="text/event-stream") |
| | else: |
| | try: |
| | full_content = await loop.run_in_executor(_executor, _sync_read_all, response) |
| | finally: |
| | session.close() |
| |
|
| | return { |
| | "id": chat_id, |
| | "object": "chat.completion", |
| | "created": int(time.time()), |
| | "model": model, |
| | "choices": [{ |
| | "index": 0, |
| | "message": {"role": "assistant", "content": full_content}, |
| | "finish_reason": "stop" |
| | }], |
| | "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} |
| | } |
| |
|
| |
|
| | if __name__ == "__main__": |
| | import uvicorn |
| | uvicorn.run("openai:app", host="0.0.0.0", port=8001, reload=False, log_level="debug") |
| |
|