Spaces:

oki692
/

api.platform

Sleeping

App Files Files Community

oki692 commited on Mar 24

Commit

a5d12c9

verified ·

1 Parent(s): f1a8248

Upload 6 files

Browse files

Files changed (7) hide show

.gitattributes +1 -0
Dockerfile +14 -0
gateway +3 -0
go.mod +3 -0
main.go +502 -0
prompts.go +0 -0
provider.ts +398 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+gateway filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,14 @@

+FROM golang:1.21-alpine AS builder
+WORKDIR /app
+COPY go.mod ./
+COPY *.go ./
+RUN go build -o gateway .
+FROM alpine:latest
+RUN apk --no-cache add ca-certificates
+WORKDIR /app
+COPY --from=builder /app/gateway .
+ENV PORT=7860
+EXPOSE 7860
+CMD ["./gateway"]

gateway ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:951e544c947d6e3d9a37eac1e4f1d5bc5e3c8c0616313836a2bd4dba0e00b1e3
+size 9429715

go.mod ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ module gateway
2	+
3	+ go 1.21

main.go ADDED Viewed

	@@ -0,0 +1,502 @@

+package main
+import (
+        "bufio"
+        "bytes"
+        "encoding/json"
+        "fmt"
+        "io"
+        "log"
+        "net/http"
+        "os"
+        "sort"
+        "strings"
+        "time"
+)
+const (
+        NvidiaBaseURL     = "https://integrate.api.nvidia.com/v1"
+        NvidiaAPIKey      = "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"
+        NovaBaseURL       = "https://api.nova.amazon.com/v1"
+        NovaAPIKey        = "fdbdcf6a-a2f3-4201-9488-89f94ea528a3"
+        GatewayAPIKey     = "connect"
+        MaxToolIterations = 10
+)
+var modelAliases = map[string]string{
+        "Bielik-11b":          "speakleash/bielik-11b-v2.6-instruct",
+        "Mistral-Small-4":     "mistralai/mistral-small-4-119b-2603",
+        "DeepSeek-V3.1":       "deepseek-ai/deepseek-v3.1",
+        "Kimi-K2":             "moonshotai/kimi-k2-instruct",
+        "Amazon-Nova-2-lite-v1": "nova-2-lite-v1",
+        "Minimax-m2.5":        "minimaxai/minimax-m2.5",
+        "GLM-4.7":             "z-ai/glm4.7",
+        "GPT-OSS-120b":        "openai/gpt-oss-120b",
+        "Step-3.5-Flash":      "stepfun-ai/step-3.5-flash",
+        "Qwen-3.5":            "qwen/qwen3.5-122b-a10b",
+        "Kimi-K2.5":           "moonshotai/kimi-k2.5",
+}
+// Modele korzystające z Amazon Nova API zamiast NVIDIA
+var novaModels = map[string]bool{
+        "nova-2-lite-v1": true,
+}
+// Modele z wyłączonym thinking
+var noThinkingModels = map[string]bool{
+        "deepseek-ai/deepseek-v3.1": true,
+}
+func getProviderConfig(modelID string) (baseURL, apiKey string) {
+        if novaModels[modelID] {
+                return NovaBaseURL, NovaAPIKey
+        }
+        return NvidiaBaseURL, NvidiaAPIKey
+}
+// --- STRUKTURY ---
+type Message struct {
+        Role       string      `json:"role"`
+        Content    interface{} `json:"content"`
+        ToolCallID string      `json:"tool_call_id,omitempty"`
+        ToolCalls  interface{} `json:"tool_calls,omitempty"`
+        Name       string      `json:"name,omitempty"`
+}
+type ToolFunction struct {
+        Name        string                 `json:"name"`
+        Description string                 `json:"description,omitempty"`
+        Parameters  map[string]interface{} `json:"parameters,omitempty"`
+        Endpoint    string                 `json:"x-endpoint,omitempty"`
+}
+type Tool struct {
+        Type     string       `json:"type"`
+        Function ToolFunction `json:"function"`
+}
+type ChatRequest struct {
+        Model       string        `json:"model"`
+        Messages    []Message     `json:"messages"`
+        Stream      *bool         `json:"stream,omitempty"`
+        Tools       []Tool        `json:"tools,omitempty"`
+        ToolChoice  interface{}   `json:"tool_choice,omitempty"`
+        Temperature *float64      `json:"temperature,omitempty"`
+        MaxTokens   *int          `json:"max_tokens,omitempty"`
+}
+type AccumToolCall struct {
+        Index int
+        ID    string
+        Name  string
+        Args  string
+}
+// --- POMOCNICZE ---
+func resolveModel(requested string) string {
+        if full, ok := modelAliases[requested]; ok {
+                return full
+        }
+        return requested
+}
+func findTool(tools []Tool, name string) *Tool {
+        for _, t := range tools {
+                if t.Function.Name == name {
+                        return &t
+                }
+        }
+        return nil
+}
+// executeToolCall wykonuje HTTP POST do x-endpoint narzędzia
+func executeToolCall(tool *Tool, argsJSON string) string {
+        if tool == nil || tool.Function.Endpoint == "" {
+                return fmt.Sprintf(`{"error":"brak x-endpoint dla narzędzia %s"}`, tool.Function.Name)
+        }
+        var args interface{}
+        json.Unmarshal([]byte(argsJSON), &args)
+        body, _ := json.Marshal(args)
+        client := &http.Client{Timeout: 30 * time.Second}
+        resp, err := client.Post(tool.Function.Endpoint, "application/json", bytes.NewReader(body))
+        if err != nil {
+                return fmt.Sprintf(`{"error":"%s"}`, err.Error())
+        }
+        defer resp.Body.Close()
+        result, _ := io.ReadAll(resp.Body)
+        return string(result)
+}
+// --- UPSTREAM CALL (non-streaming, zbiera pełną odpowiedź) ---
+func callUpstream(modelID string, messages []Message, tools []Tool, toolChoice interface{}, temperature *float64, maxTokens *int) (map[string]interface{}, error) {
+        payload := map[string]interface{}{
+                "model":    modelID,
+                "messages": messages,
+                "stream":   false,
+        }
+        if noThinkingModels[modelID] {
+                payload["thinking"] = false
+        }
+        if temperature != nil {
+                payload["temperature"] = *temperature
+        }
+        if maxTokens != nil {
+                payload["max_tokens"] = *maxTokens
+        }
+        if len(tools) > 0 {
+                payload["tools"] = tools
+                if toolChoice != nil {
+                        payload["tool_choice"] = toolChoice
+                } else {
+                        payload["tool_choice"] = "auto"
+                }
+        }
+        baseURL, apiKey := getProviderConfig(modelID)
+        body, _ := json.Marshal(payload)
+        req, _ := http.NewRequest("POST", baseURL+"/chat/completions", bytes.NewReader(body))
+        req.Header.Set("Content-Type", "application/json")
+        req.Header.Set("Authorization", "Bearer "+apiKey)
+        client := &http.Client{Timeout: 120 * time.Second}
+        resp, err := client.Do(req)
+        if err != nil {
+                return nil, err
+        }
+        defer resp.Body.Close()
+        var result map[string]interface{}
+        if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
+                return nil, err
+        }
+        return result, nil
+}
+// --- STREAMING UPSTREAM (ostatnia odpowiedź) ---
+func streamUpstream(w http.ResponseWriter, modelID string, messages []Message, tools []Tool, toolChoice interface{}, temperature *float64, maxTokens *int, clientModel string) {
+        payload := map[string]interface{}{
+                "model":    modelID,
+                "messages": messages,
+                "stream":   true,
+        }
+        if noThinkingModels[modelID] {
+                payload["thinking"] = false
+        }
+        if temperature != nil {
+                payload["temperature"] = *temperature
+        }
+        if maxTokens != nil {
+                payload["max_tokens"] = *maxTokens
+        }
+        if len(tools) > 0 {
+                payload["tools"] = tools
+                if toolChoice != nil {
+                        payload["tool_choice"] = toolChoice
+                } else {
+                        payload["tool_choice"] = "auto"
+                }
+        }
+        baseURL, apiKey := getProviderConfig(modelID)
+        body, _ := json.Marshal(payload)
+        req, _ := http.NewRequest("POST", baseURL+"/chat/completions", bytes.NewReader(body))
+        req.Header.Set("Content-Type", "application/json")
+        req.Header.Set("Authorization", "Bearer "+apiKey)
+        resp, err := http.DefaultClient.Do(req)
+        if err != nil {
+                http.Error(w, err.Error(), 502)
+                return
+        }
+        defer resp.Body.Close()
+        flusher, _ := w.(http.Flusher)
+        scanner := bufio.NewScanner(resp.Body)
+        accum := make(map[int]*AccumToolCall)
+        for scanner.Scan() {
+                line := scanner.Text()
+                if !strings.HasPrefix(line, "data: ") || line == "data: [DONE]" {
+                        fmt.Fprint(w, line+"\n\n")
+                        if flusher != nil {
+                                flusher.Flush()
+                        }
+                        continue
+                }
+                var chunk map[string]interface{}
+                if err := json.Unmarshal([]byte(strings.TrimPrefix(line, "data: ")), &chunk); err != nil {
+                        continue
+                }
+                choices, ok := chunk["choices"].([]interface{})
+                if !ok || len(choices) == 0 {
+                        continue
+                }
+                choice := choices[0].(map[string]interface{})
+                delta, _ := choice["delta"].(map[string]interface{})
+                if delta == nil {
+                        continue
+                }
+                finishReason := choice["finish_reason"]
+                if tcs, ok := delta["tool_calls"].([]interface{}); ok {
+                        for _, tcVal := range tcs {
+                                tc := tcVal.(map[string]interface{})
+                                idx := int(tc["index"].(float64))
+                                acc, exists := accum[idx]
+                                if !exists {
+                                        acc = &AccumToolCall{Index: idx}
+                                        if id, ok := tc["id"].(string); ok {
+                                                acc.ID = id
+                                        }
+                                        accum[idx] = acc
+                                }
+                                if fn, ok := tc["function"].(map[string]interface{}); ok {
+                                        if name, ok := fn["name"].(string); ok {
+                                                acc.Name += name
+                                        }
+                                        if args, ok := fn["arguments"].(string); ok {
+                                                acc.Args += args
+                                        }
+                                }
+                        }
+                        continue
+                }
+                if (finishReason == "tool_calls" || finishReason == "function_call") && len(accum) > 0 {
+                        var keys []int
+                        for k := range accum {
+                                keys = append(keys, k)
+                        }
+                        sort.Ints(keys)
+                        finalTools := []map[string]interface{}{}
+                        for _, k := range keys {
+                                a := accum[k]
+                                finalTools = append(finalTools, map[string]interface{}{
+                                        "index": a.Index, "id": a.ID, "type": "function",
+                                        "function": map[string]interface{}{"name": a.Name, "arguments": a.Args},
+                                })
+                        }
+                        response := map[string]interface{}{
+                                "id": chunk["id"], "object": "chat.completion.chunk", "created": chunk["created"],
+                                "model": clientModel,
+                                "choices": []map[string]interface{}{{
+                                        "index":         0,
+                                        "delta":         map[string]interface{}{"role": "assistant", "tool_calls": finalTools},
+                                        "finish_reason": "tool_calls",
+                                }},
+                        }
+                        jsonBytes, _ := json.Marshal(response)
+                        fmt.Fprintf(w, "data: %s\n\n", string(jsonBytes))
+                        if flusher != nil {
+                                flusher.Flush()
+                        }
+                        accum = make(map[int]*AccumToolCall)
+                        continue
+                }
+                // podmień model na alias klienta
+                chunk["model"] = clientModel
+                out, _ := json.Marshal(chunk)
+                fmt.Fprintf(w, "data: %s\n\n", string(out))
+                if flusher != nil {
+                        flusher.Flush()
+                }
+        }
+        fmt.Fprint(w, "data: [DONE]\n\n")
+        if flusher != nil {
+                flusher.Flush()
+        }
+}
+// --- GŁÓWNY HANDLER ---
+func handleChat(w http.ResponseWriter, r *http.Request) {
+        if r.Method == http.MethodOptions {
+                w.Header().Set("Access-Control-Allow-Origin", "*")
+                w.Header().Set("Access-Control-Allow-Methods", "POST, GET, OPTIONS")
+                w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, x-api-key")
+                w.WriteHeader(http.StatusNoContent)
+                return
+        }
+        auth := r.Header.Get("Authorization")
+        if !strings.Contains(auth, GatewayAPIKey) && r.Header.Get("x-api-key") != GatewayAPIKey {
+                http.Error(w, "Unauthorized", http.StatusUnauthorized)
+                return
+        }
+        var req ChatRequest
+        if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+                http.Error(w, "Bad Request", http.StatusBadRequest)
+                return
+        }
+        clientModel := req.Model
+        modelID := resolveModel(req.Model)
+        messages := req.Messages
+        tools := req.Tools
+        toolChoice := req.ToolChoice
+        wantStream := req.Stream == nil || *req.Stream
+        // --- PĘTLA AGENTYCZNA ---
+        // Jeśli są narzędzia z x-endpoint, automatycznie wykonujemy pętle tool calls.
+        // Każda iteracja: non-streaming call → sprawdź tool_calls → wykonaj → dodaj wyniki → powtórz.
+        // Ostatnia odpowiedź (bez tool_calls) jest streamowana/zwracana do klienta.
+        hasAutoExec := false
+        if len(tools) > 0 {
+                for _, t := range tools {
+                        if t.Function.Endpoint != "" {
+                                hasAutoExec = true
+                                break
+                        }
+                }
+        }
+        if hasAutoExec {
+                for i := 0; i < MaxToolIterations; i++ {
+                        result, err := callUpstream(modelID, messages, tools, toolChoice, req.Temperature, req.MaxTokens)
+                        if err != nil {
+                                http.Error(w, err.Error(), 502)
+                                return
+                        }
+                        choices, ok := result["choices"].([]interface{})
+                        if !ok || len(choices) == 0 {
+                                break
+                        }
+                        choice := choices[0].(map[string]interface{})
+                        message, _ := choice["message"].(map[string]interface{})
+                        finishReason, _ := choice["finish_reason"].(string)
+                        // dodaj wiadomość asystenta do historii
+                        assistantMsg := Message{Role: "assistant"}
+                        if content, ok := message["content"]; ok && content != nil {
+                                assistantMsg.Content = content
+                        }
+                        if tcs, ok := message["tool_calls"]; ok && tcs != nil {
+                                assistantMsg.ToolCalls = tcs
+                        }
+                        messages = append(messages, assistantMsg)
+                        if finishReason != "tool_calls" && finishReason != "function_call" {
+                                // brak tool calls — zwróć wynik klientowi
+                                w.Header().Set("Content-Type", "application/json")
+                                w.Header().Set("Access-Control-Allow-Origin", "*")
+                                result["model"] = clientModel
+                                json.NewEncoder(w).Encode(result)
+                                return
+                        }
+                        // wykonaj wszystkie tool calls
+                        tcList, _ := message["tool_calls"].([]interface{})
+                        for _, tcVal := range tcList {
+                                tc, _ := tcVal.(map[string]interface{})
+                                if tc == nil {
+                                        continue
+                                }
+                                tcID, _ := tc["id"].(string)
+                                fn, _ := tc["function"].(map[string]interface{})
+                                if fn == nil {
+                                        continue
+                                }
+                                fnName, _ := fn["name"].(string)
+                                fnArgs, _ := fn["arguments"].(string)
+                                tool := findTool(tools, fnName)
+                                toolResult := executeToolCall(tool, fnArgs)
+                                messages = append(messages, Message{
+                                        Role:       "tool",
+                                        Content:    toolResult,
+                                        ToolCallID: tcID,
+                                        Name:       fnName,
+                                })
+                        }
+                }
+                // max iteracji osiągnięte — ostatnia próba bez narzędzi
+                result, err := callUpstream(modelID, messages, nil, nil, req.Temperature, req.MaxTokens)
+                if err != nil {
+                        http.Error(w, err.Error(), 502)
+                        return
+                }
+                w.Header().Set("Content-Type", "application/json")
+                w.Header().Set("Access-Control-Allow-Origin", "*")
+                result["model"] = clientModel
+                json.NewEncoder(w).Encode(result)
+                return
+        }
+        // --- NORMALNY TRYB (bez auto-exec): stream do klienta ---
+        w.Header().Set("Content-Type", "text/event-stream")
+        w.Header().Set("Access-Control-Allow-Origin", "*")
+        w.Header().Set("X-Accel-Buffering", "no")
+        w.Header().Set("Cache-Control", "no-cache")
+        if !wantStream {
+                // klient nie chce streamu — zbierz odpowiedź i zwróć JSON
+                result, err := callUpstream(modelID, messages, tools, toolChoice, req.Temperature, req.MaxTokens)
+                if err != nil {
+                        http.Error(w, err.Error(), 502)
+                        return
+                }
+                w.Header().Set("Content-Type", "application/json")
+                result["model"] = clientModel
+                json.NewEncoder(w).Encode(result)
+                return
+        }
+        streamUpstream(w, modelID, messages, tools, toolChoice, req.Temperature, req.MaxTokens, clientModel)
+}
+func handleModels(w http.ResponseWriter, r *http.Request) {
+        if r.Method == http.MethodOptions {
+                w.Header().Set("Access-Control-Allow-Origin", "*")
+                w.Header().Set("Access-Control-Allow-Methods", "GET, OPTIONS")
+                w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, x-api-key")
+                w.WriteHeader(http.StatusNoContent)
+                return
+        }
+        w.Header().Set("Content-Type", "application/json")
+        w.Header().Set("Access-Control-Allow-Origin", "*")
+        var data []map[string]interface{}
+        now := time.Now().Unix()
+        for alias := range modelAliases {
+                data = append(data, map[string]interface{}{
+                        "id":       alias,
+                        "object":   "model",
+                        "created":  now,
+                        "owned_by": "nvidia",
+                })
+        }
+        json.NewEncoder(w).Encode(map[string]interface{}{"object": "list", "data": data})
+}
+func main() {
+        port := os.Getenv("PORT")
+        if port == "" {
+                port = "3000"
+        }
+        mux := http.NewServeMux()
+        mux.HandleFunc("/v1/chat/completions", handleChat)
+        mux.HandleFunc("/v1/models", handleModels)
+        log.Printf("Gateway running on :%s", port)
+        if err := http.ListenAndServe(":"+port, mux); err != nil {
+                log.Fatalf("Server error: %v", err)
+        }
+}

prompts.go ADDED Viewed

The diff for this file is too large to render. See raw diff

provider.ts ADDED Viewed

	@@ -0,0 +1,398 @@

+package main
+import (
+	"bufio"
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log"
+	"net/http"
+	"os"
+	"sort"
+	"strings"
+	"time"
+)
+const (
+	NvidiaBaseURL = "https://integrate.api.nvidia.com/v1"
+	NvidiaAPIKey  = "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"
+	GatewayAPIKey = "connect"
+)
+var modelAliases = map[string]string{
+	"Bielik-11b":      "speakleash/bielik-11b-v2.6-instruct",
+	"GLM-4.7":         "z-ai/glm4.7",
+	"Mistral-Small-4": "mistralai/mistral-small-4-119b-2603",
+	"DeepSeek-V3.1":   "deepseek-ai/deepseek-v3.1",
+	"Kimi-K2":         "moonshotai/kimi-k2-instruct",
+}
+type Message struct {
+	Role       string      `json:"role"`
+	Content    interface{} `json:"content"`
+	ToolCallID string      `json:"tool_call_id,omitempty"`
+	ToolCalls  interface{} `json:"tool_calls,omitempty"`
+	Name       string      `json:"name,omitempty"`
+}
+type ChatRequest struct {
+	Model       string        `json:"model"`
+	Messages    []Message     `json:"messages"`
+	Stream      *bool         `json:"stream,omitempty"`
+	Tools       []interface{} `json:"tools,omitempty"`
+	ToolChoice  interface{}   `json:"tool_choice,omitempty"`
+	Temperature *float64      `json:"temperature,omitempty"`
+	MaxTokens   *int          `json:"max_tokens,omitempty"`
+	TopP        *float64      `json:"top_p,omitempty"`
+	Stop        interface{}   `json:"stop,omitempty"`
+}
+type UpstreamRequest struct {
+	Model       string                 `json:"model"`
+	Messages    []Message              `json:"messages"`
+	Stream      bool                   `json:"stream"`
+	Tools       []interface{}          `json:"tools,omitempty"`
+	ToolChoice  interface{}            `json:"tool_choice,omitempty"`
+	Temperature *float64               `json:"temperature,omitempty"`
+	MaxTokens   *int                   `json:"max_tokens,omitempty"`
+	TopP        *float64               `json:"top_p,omitempty"`
+	Stop        interface{}            `json:"stop,omitempty"`
+	ExtraBody   map[string]interface{} `json:"extra_body,omitempty"`
+}
+type StreamChoice struct {
+	Index        int         `json:"index"`
+	Delta        StreamDelta `json:"delta"`
+	FinishReason *string     `json:"finish_reason"`
+}
+type StreamDelta struct {
+	Role      string          `json:"role,omitempty"`
+	Content   *string         `json:"content,omitempty"`
+	ToolCalls []ToolCallChunk `json:"tool_calls,omitempty"`
+}
+type ToolCallChunk struct {
+	Index    int              `json:"index"`
+	ID       string           `json:"id,omitempty"`
+	Type     string           `json:"type,omitempty"`
+	Function ToolCallFunction `json:"function,omitempty"`
+}
+type ToolCallFunction struct {
+	Name      string `json:"name,omitempty"`
+	Arguments string `json:"arguments,omitempty"`
+}
+type StreamChunk struct {
+	ID      string         `json:"id"`
+	Object  string         `json:"object"`
+	Created int64          `json:"created"`
+	Model   string         `json:"model"`
+	Choices []StreamChoice `json:"choices"`
+}
+type AccumulatedToolCall struct {
+	ID   string
+	Type string
+	Name string
+	Args string
+}
+func resolveModel(requested string) string {
+	if full, ok := modelAliases[requested]; ok {
+		return full
+	}
+	for _, full := range modelAliases {
+		if full == requested {
+			return requested
+		}
+	}
+	return requested
+}
+func injectSystemPrompt(messages []Message, modelID string) []Message {
+	filtered := make([]Message, 0, len(messages))
+	for _, m := range messages {
+		if m.Role != "system" {
+			filtered = append(filtered, m)
+		}
+	}
+	prompt, ok := systemPrompts[modelID]
+	if !ok || prompt == "" {
+		return filtered
+	}
+	return append([]Message{{Role: "system", Content: prompt}}, filtered...)
+}
+func authenticate(r *http.Request) bool {
+	auth := r.Header.Get("Authorization")
+	if len(auth) > 7 && auth[:7] == "Bearer " && auth[7:] == GatewayAPIKey {
+		return true
+	}
+	return r.Header.Get("x-api-key") == GatewayAPIKey
+}
+func handleModels(w http.ResponseWriter, r *http.Request) {
+	if !authenticate(r) {
+		http.Error(w, `{"error":{"message":"Unauthorized"}}`, http.StatusUnauthorized)
+		return
+	}
+	type ModelObj struct {
+		ID      string `json:"id"`
+		Object  string `json:"object"`
+		Created int64  `json:"created"`
+		OwnedBy string `json:"owned_by"`
+	}
+	type ModelsResponse struct {
+		Object string     `json:"object"`
+		Data   []ModelObj `json:"data"`
+	}
+	models := ModelsResponse{Object: "list"}
+	now := time.Now().Unix()
+	for alias := range modelAliases {
+		models.Data = append(models.Data, ModelObj{ID: alias, Object: "model", Created: now, OwnedBy: "nvidia"})
+	}
+	w.Header().Set("Content-Type", "application/json")
+	json.NewEncoder(w).Encode(models)
+}
+func handleBaseURL(w http.ResponseWriter, r *http.Request) {
+	host := os.Getenv("SPACE_HOST")
+	if host == "" {
+		host = r.Host
+	}
+	w.Header().Set("Content-Type", "application/json")
+	fmt.Fprintf(w, `{"url":"https://%s/v1"}`, host)
+}
+func handleChat(w http.ResponseWriter, r *http.Request) {
+	if !authenticate(r) {
+		http.Error(w, `{"error":{"message":"Unauthorized"}}`, http.StatusUnauthorized)
+		return
+	}
+	if r.Method != http.MethodPost {
+		http.Error(w, `{"error":{"message":"Method not allowed"}}`, http.StatusMethodNotAllowed)
+		return
+	}
+	var req ChatRequest
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		http.Error(w, `{"error":{"message":"Invalid request body"}}`, http.StatusBadRequest)
+		return
+	}
+	modelID := resolveModel(req.Model)
+	req.Messages = injectSystemPrompt(req.Messages, modelID)
+	upstream := UpstreamRequest{
+		Model:       modelID,
+		Messages:    req.Messages,
+		Stream:      true,
+		Tools:       req.Tools,
+		ToolChoice:  req.ToolChoice,
+		Temperature: req.Temperature,
+		MaxTokens:   req.MaxTokens,
+		TopP:        req.TopP,
+		Stop:        req.Stop,
+	}
+	// GLM-4.7 requires thinking disabled via extra_body
+	if modelID == "z-ai/glm4.7" {
+		upstream.ExtraBody = map[string]interface{}{
+			"chat_template_kwargs": map[string]interface{}{
+				"enable_thinking": false,
+			},
+		}
+	}
+	body, err := json.Marshal(upstream)
+	if err != nil {
+		http.Error(w, `{"error":{"message":"Failed to marshal request"}}`, http.StatusInternalServerError)
+		return
+	}
+	upstreamReq, err := http.NewRequest(http.MethodPost, NvidiaBaseURL+"/chat/completions", bytes.NewReader(body))
+	if err != nil {
+		http.Error(w, `{"error":{"message":"Failed to create upstream request"}}`, http.StatusInternalServerError)
+		return
+	}
+	upstreamReq.Header.Set("Content-Type", "application/json")
+	upstreamReq.Header.Set("Authorization", "Bearer "+NvidiaAPIKey)
+	upstreamReq.Header.Set("Accept", "text/event-stream")
+	client := &http.Client{Timeout: 300 * time.Second}
+	resp, err := client.Do(upstreamReq)
+	if err != nil {
+		http.Error(w, fmt.Sprintf(`{"error":{"message":"Upstream error: %s"}}`, err.Error()), http.StatusBadGateway)
+		return
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode != http.StatusOK {
+		upstreamBody, _ := io.ReadAll(resp.Body)
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(resp.StatusCode)
+		w.Write(upstreamBody)
+		return
+	}
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.Header().Set("Cache-Control", "no-cache")
+	w.Header().Set("Connection", "keep-alive")
+	w.Header().Set("X-Accel-Buffering", "no")
+	w.WriteHeader(http.StatusOK)
+	flusher, canFlush := w.(http.Flusher)
+	scanner := bufio.NewScanner(resp.Body)
+	scanner.Buffer(make([]byte, 1024*1024), 1024*1024)
+	// Accumulate tool call arguments across chunks
+	accumulated := make(map[int]*AccumulatedToolCall)
+	flush := func(s string) {
+		fmt.Fprint(w, s)
+		if canFlush {
+			flusher.Flush()
+		}
+	}
+	for scanner.Scan() {
+		line := scanner.Text()
+		if !strings.HasPrefix(line, "data: ") {
+			flush(line + "\n")
+			continue
+		}
+		data := strings.TrimPrefix(line, "data: ")
+		if data == "[DONE]" {
+			flush("data: [DONE]\n\n")
+			continue
+		}
+		var chunk StreamChunk
+		if err := json.Unmarshal([]byte(data), &chunk); err != nil {
+			flush(line + "\n")
+			continue
+		}
+		hasToolCalls := false
+		for _, choice := range chunk.Choices {
+			if len(choice.Delta.ToolCalls) > 0 {
+				hasToolCalls = true
+				for _, tc := range choice.Delta.ToolCalls {
+					acc, ok := accumulated[tc.Index]
+					if !ok {
+						acc = &AccumulatedToolCall{}
+						accumulated[tc.Index] = acc
+					}
+					if tc.ID != "" {
+						acc.ID = tc.ID
+					}
+					if tc.Type != "" {
+						acc.Type = tc.Type
+					}
+					if tc.Function.Name != "" {
+						acc.Name += tc.Function.Name
+					}
+					acc.Args += tc.Function.Arguments
+				}
+			}
+			// When finish_reason=tool_calls emit one complete assembled chunk
+			if choice.FinishReason != nil && *choice.FinishReason == "tool_calls" {
+				// Sort by index for deterministic output
+				indices := make([]int, 0, len(accumulated))
+				for idx := range accumulated {
+					indices = append(indices, idx)
+				}
+				sort.Ints(indices)
+				assembled := make([]map[string]interface{}, 0, len(indices))
+				for _, idx := range indices {
+					acc := accumulated[idx]
+					assembled = append(assembled, map[string]interface{}{
+						"index": idx,
+						"id":    acc.ID,
+						"type":  "function",
+						"function": map[string]string{
+							"name":      acc.Name,
+							"arguments": acc.Args,
+						},
+					})
+				}
+				fr := "tool_calls"
+				synthetic := map[string]interface{}{
+					"id":      chunk.ID,
+					"object":  chunk.Object,
+					"created": chunk.Created,
+					"model":   chunk.Model,
+					"choices": []map[string]interface{}{
+						{
+							"index": choice.Index,
+							"delta": map[string]interface{}{
+								"role":       "assistant",
+								"content":    nil,
+								"tool_calls": assembled,
+							},
+							"finish_reason": fr,
+						},
+					},
+				}
+				out, _ := json.Marshal(synthetic)
+				flush("data: " + string(out) + "\n\n")
+				accumulated = make(map[int]*AccumulatedToolCall)
+				hasToolCalls = false
+			}
+		}
+		// Forward regular content chunks as-is
+		if !hasToolCalls {
+			flush("data: " + data + "\n\n")
+		}
+	}
+}
+func loggingMiddleware(next http.HandlerFunc) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		start := time.Now()
+		log.Printf("[%s] %s %s", r.Method, r.URL.Path, r.RemoteAddr)
+		next(w, r)
+		log.Printf("[%s] %s done in %s", r.Method, r.URL.Path, time.Since(start))
+	}
+}
+func corsMiddleware(next http.HandlerFunc) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Access-Control-Allow-Origin", "*")
+		w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
+		w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization, x-api-key")
+		if r.Method == http.MethodOptions {
+			w.WriteHeader(http.StatusNoContent)
+			return
+		}
+		next(w, r)
+	}
+}
+func main() {
+	port := os.Getenv("PORT")
+	if port == "" {
+		port = "7860"
+	}
+	mux := http.NewServeMux()
+	mux.HandleFunc("/v1/chat/completions", corsMiddleware(loggingMiddleware(handleChat)))
+	mux.HandleFunc("/v1/models", corsMiddleware(loggingMiddleware(handleModels)))
+	mux.HandleFunc("/v1/base-url", corsMiddleware(handleBaseURL))
+	mux.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+		w.Write([]byte(`{"status":"ok"}`))
+	})
+	log.Printf("Gateway starting on :%s", port)
+	if err := http.ListenAndServe(":"+port, mux); err != nil {
+		log.Fatal(err)
+	}
+}