Files changed (3) hide show
  1. Dockerfile +40 -0
  2. README.md +10 -11
  3. hunyuan2api.go +1370 -0
Dockerfile ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --- 第一阶段:构建阶段 (Builder Stage) ---
2
+ # 使用官方的 Go 语言镜像作为编译环境, Alpine 版本比较小巧
3
+ FROM golang:1.22-alpine AS builder
4
+ # 或者 FROM golang:1.22 # 如果 alpine 的 musl libc 与您的代码有兼容问题
5
+
6
+ # 设置构建阶段的工作目录
7
+ WORKDIR /build
8
+
9
+ # 将你的 Go 源代码文件 (hunyuan2api.go) 复制到构建环境的 /build/ 目录下
10
+ COPY hunyuan2api.go .
11
+
12
+ # 编译 Go 应用程序
13
+ # CGO_ENABLED=0 尝试进行静态链接,避免 C 库依赖问题,尤其是在使用 alpine 镜像时
14
+ # -ldflags="-w -s" 用于减小编译后二进制文件的大小
15
+ # -o /app/hunyuan2api 指定编译输出的可执行文件路径和名称
16
+ # hunyuan2api.go 是你的源文件名
17
+ RUN CGO_ENABLED=0 go build -ldflags="-w -s" -o /app/hunyuan2api hunyuan2api.go
18
+
19
+ # --- 第二阶段:运行阶段 (Final Stage) ---
20
+ # 使用一个非常精简的基础镜像来运行编译好的程序
21
+ FROM alpine:latest
22
+ # 注意:如果静态编译 (CGO_ENABLED=0) 失败或运行时仍有问题,
23
+ # 可能需要换成基于 glibc 的镜像,例如 'debian:stable-slim'
24
+ # FROM debian:stable-slim
25
+
26
+ # 设置最终运行阶段的工作目录
27
+ WORKDIR /app
28
+
29
+ # 从第一阶段 (builder) 复制编译好的二进制文件到最终镜像的 /app/ 目录下
30
+ COPY --from=builder /app/hunyuan2api /app/hunyuan2api
31
+
32
+ # 确保复制过来的二进制文件具有执行权限
33
+ RUN chmod +x /app/hunyuan2api
34
+
35
+ # 暴露你的 Go 应用程序监听的网络端口 (根据你的启动参数是 6677)
36
+ EXPOSE 6677
37
+
38
+ # 设置容器启动时执行的命令
39
+ # 这里的启动参数需要和您提供的一致
40
+ CMD ["/app/hunyuan2api", "--address", "0.0.0.0", "--port", "6677", "--verify-ssl=false", "--dev", "--workers", "400", "--queue-size", "1000", "--max-concurrent", "400"]
README.md CHANGED
@@ -1,11 +1,10 @@
1
- ---
2
- title: Hunyuan2api
3
- emoji: 🌍
4
- colorFrom: indigo
5
- colorTo: red
6
- sdk: docker
7
- pinned: false
8
- license: gpl-3.0
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: Hunyuan2api # 标题
3
+ emoji: 🌍 # Emoji
4
+ colorFrom: indigo # 渐变起始色
5
+ colorTo: red # 渐变结束色
6
+ sdk: docker # 指定使用 Docker SDK
7
+ app_port: 6677 # 【新增】指定应用程序在容器内监听的端口
8
+ pinned: false # 是否固定在个人资料页
9
+ license: gpl-3.0 # 开源许可证
10
+ ---
 
hunyuan2api.go ADDED
@@ -0,0 +1,1370 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ package main
2
+
3
+ import (
4
+ "bufio"
5
+ "bytes"
6
+ "context"
7
+ "crypto/tls"
8
+ "encoding/json"
9
+ "flag"
10
+ "fmt"
11
+ "io"
12
+ "log"
13
+ "net/http"
14
+ "os"
15
+ "os/signal"
16
+ "strings"
17
+ "sync"
18
+ "sync/atomic"
19
+ "syscall"
20
+ "time"
21
+ )
22
+
23
+ // WorkerPool 工作池结构体,用于管理goroutine
24
+ type WorkerPool struct {
25
+ taskQueue chan *Task
26
+ workerCount int
27
+ shutdownChannel chan struct{}
28
+ wg sync.WaitGroup
29
+ }
30
+
31
+ // Task 任务结构体,包含请求处理所需数据
32
+ type Task struct {
33
+ r *http.Request
34
+ w http.ResponseWriter
35
+ done chan struct{}
36
+ reqID string
37
+ isStream bool
38
+ hunyuanReq HunyuanRequest
39
+ }
40
+
41
+ // NewWorkerPool 创建并启动一个新的工作池
42
+ func NewWorkerPool(workerCount int, queueSize int) *WorkerPool {
43
+ pool := &WorkerPool{
44
+ taskQueue: make(chan *Task, queueSize),
45
+ workerCount: workerCount,
46
+ shutdownChannel: make(chan struct{}),
47
+ }
48
+
49
+ pool.Start()
50
+ return pool
51
+ }
52
+
53
+ // Start 启动工作池中的worker goroutines
54
+ func (pool *WorkerPool) Start() {
55
+ // 启动工作goroutine
56
+ for i := 0; i < pool.workerCount; i++ {
57
+ pool.wg.Add(1)
58
+ go func(workerID int) {
59
+ defer pool.wg.Done()
60
+
61
+ logInfo("Worker %d 已启动", workerID)
62
+
63
+ for {
64
+ select {
65
+ case task, ok := <-pool.taskQueue:
66
+ if !ok {
67
+ // 队列已关闭,退出worker
68
+ logInfo("Worker %d 收到队列关闭信号,准备退出", workerID)
69
+ return
70
+ }
71
+
72
+ logDebug("Worker %d 处理任务 reqID:%s", workerID, task.reqID)
73
+
74
+ // 处理任务
75
+ if task.isStream {
76
+ err := handleStreamingRequest(task.w, task.r, task.hunyuanReq, task.reqID)
77
+ if err != nil {
78
+ logError("Worker %d 处理流式任务失败: %v", workerID, err)
79
+ }
80
+ } else {
81
+ err := handleNonStreamingRequest(task.w, task.r, task.hunyuanReq, task.reqID)
82
+ if err != nil {
83
+ logError("Worker %d 处理非流式任务失败: %v", workerID, err)
84
+ }
85
+ }
86
+
87
+ // 通知任务完成
88
+ close(task.done)
89
+
90
+ case <-pool.shutdownChannel:
91
+ // 收到关闭信号,退出worker
92
+ logInfo("Worker %d 收到关闭信号,准备退出", workerID)
93
+ return
94
+ }
95
+ }
96
+ }(i)
97
+ }
98
+ }
99
+
100
+ // SubmitTask 提交任务到工作池,非阻塞
101
+ func (pool *WorkerPool) SubmitTask(task *Task) (bool, error) {
102
+ select {
103
+ case pool.taskQueue <- task:
104
+ // 任务成功添加到队列
105
+ return true, nil
106
+ default:
107
+ // 队列已满
108
+ return false, fmt.Errorf("任务队列已满")
109
+ }
110
+ }
111
+
112
+ // Shutdown 关闭工作池
113
+ func (pool *WorkerPool) Shutdown() {
114
+ logInfo("正在关闭工作池...")
115
+
116
+ // 发送关闭信号给所有worker
117
+ close(pool.shutdownChannel)
118
+
119
+ // 等待所有worker退出
120
+ pool.wg.Wait()
121
+
122
+ // 关闭任务队列
123
+ close(pool.taskQueue)
124
+
125
+ logInfo("工作池已关闭")
126
+ }
127
+
128
+ // Semaphore 信号量实现,用于限制并发数量
129
+ type Semaphore struct {
130
+ sem chan struct{}
131
+ }
132
+
133
+ // NewSemaphore 创建新的信号量
134
+ func NewSemaphore(size int) *Semaphore {
135
+ return &Semaphore{
136
+ sem: make(chan struct{}, size),
137
+ }
138
+ }
139
+
140
+ // Acquire 获取信号量(阻塞)
141
+ func (s *Semaphore) Acquire() {
142
+ s.sem <- struct{}{}
143
+ }
144
+
145
+ // Release 释放信号量
146
+ func (s *Semaphore) Release() {
147
+ <-s.sem
148
+ }
149
+
150
+ // TryAcquire 尝试获取信号量(非阻塞)
151
+ func (s *Semaphore) TryAcquire() bool {
152
+ select {
153
+ case s.sem <- struct{}{}:
154
+ return true
155
+ default:
156
+ return false
157
+ }
158
+ }
159
+
160
+ // 配置结构体用于存储命令行参数
161
+ type Config struct {
162
+ Port string // 代理服务器监听端口
163
+ Address string // 代理服务器监听地址
164
+ LogLevel string // 日志级别
165
+ DevMode bool // 开发模式标志
166
+ MaxRetries int // 最大重试次数
167
+ Timeout int // 请求超时时间(秒)
168
+ VerifySSL bool // 是否验证SSL证书
169
+ ModelName string // 默认模型名称
170
+ BearerToken string // Bearer Token (默认提供公开Token)
171
+ WorkerCount int // 工作池中的worker数量
172
+ QueueSize int // 任务队列大小
173
+ MaxConcurrent int // 最大并发请求数
174
+ }
175
+
176
+ // 支持的模型列表
177
+ var SupportedModels = []string{
178
+ "hunyuan-t1-latest",
179
+ "hunyuan-turbos-latest",
180
+ }
181
+
182
+ // 腾讯混元 API 目标URL
183
+ const (
184
+ TargetURL = "https://llm.hunyuan.tencent.com/aide/api/v2/triton_image/demo_text_chat/"
185
+ Version = "1.0.0" // 版本号
186
+ )
187
+
188
+ // 日志级别
189
+ const (
190
+ LogLevelDebug = "debug"
191
+ LogLevelInfo = "info"
192
+ LogLevelWarn = "warn"
193
+ LogLevelError = "error"
194
+ )
195
+
196
+ // 解析命令行参数并返回 Config 实例
197
+ func parseFlags() *Config {
198
+ cfg := &Config{}
199
+ flag.StringVar(&cfg.Port, "port", "6666", "Port to listen on")
200
+ flag.StringVar(&cfg.Address, "address", "localhost", "Address to listen on")
201
+ flag.StringVar(&cfg.LogLevel, "log-level", LogLevelInfo, "Log level (debug, info, warn, error)")
202
+ flag.BoolVar(&cfg.DevMode, "dev", false, "Enable development mode with enhanced logging")
203
+ flag.IntVar(&cfg.MaxRetries, "max-retries", 3, "Maximum number of retries for failed requests")
204
+ flag.IntVar(&cfg.Timeout, "timeout", 300, "Request timeout in seconds")
205
+ flag.BoolVar(&cfg.VerifySSL, "verify-ssl", true, "Verify SSL certificates")
206
+ flag.StringVar(&cfg.ModelName, "model", "hunyuan-t1-latest", "Default Hunyuan model name")
207
+ flag.StringVar(&cfg.BearerToken, "token", "7auGXNATFSKl7dF", "Bearer token for Hunyuan API")
208
+ flag.IntVar(&cfg.WorkerCount, "workers", 50, "Number of worker goroutines in the pool")
209
+ flag.IntVar(&cfg.QueueSize, "queue-size", 500, "Size of the task queue")
210
+ flag.IntVar(&cfg.MaxConcurrent, "max-concurrent", 100, "Maximum number of concurrent requests")
211
+ flag.Parse()
212
+
213
+ // 如果开发模式开启,自动设置日志级别为debug
214
+ if cfg.DevMode && cfg.LogLevel != LogLevelDebug {
215
+ cfg.LogLevel = LogLevelDebug
216
+ fmt.Println("开发模式已启用,日志级别设置为debug")
217
+ }
218
+
219
+ return cfg
220
+ }
221
+
222
+ // 全局配置变量
223
+ var (
224
+ appConfig *Config
225
+ )
226
+
227
+ // 性能指标
228
+ var (
229
+ requestCounter int64
230
+ successCounter int64
231
+ errorCounter int64
232
+ avgResponseTime int64
233
+ latencyHistogram [10]int64 // 0-100ms, 100-200ms, ... >1s
234
+ queuedRequests int64 // 当前在队列中的请求数
235
+ rejectedRequests int64 // 被拒绝的请求数
236
+ )
237
+
238
+ // 并发控制组件
239
+ var (
240
+ workerPool *WorkerPool // 工作池
241
+ requestSem *Semaphore // 请求信号量
242
+ )
243
+
244
+ // 日志记录器
245
+ var (
246
+ logger *log.Logger
247
+ logLevel string
248
+ logMutex sync.Mutex
249
+ )
250
+
251
+ // 日志初始化
252
+ func initLogger(level string) {
253
+ logger = log.New(os.Stdout, "[HunyuanAPI] ", log.LstdFlags)
254
+ logLevel = level
255
+ }
256
+
257
+ // 根据日志级别记录日志
258
+ func logDebug(format string, v ...interface{}) {
259
+ if logLevel == LogLevelDebug {
260
+ logMutex.Lock()
261
+ logger.Printf("[DEBUG] "+format, v...)
262
+ logMutex.Unlock()
263
+ }
264
+ }
265
+
266
+ func logInfo(format string, v ...interface{}) {
267
+ if logLevel == LogLevelDebug || logLevel == LogLevelInfo {
268
+ logMutex.Lock()
269
+ logger.Printf("[INFO] "+format, v...)
270
+ logMutex.Unlock()
271
+ }
272
+ }
273
+
274
+ func logWarn(format string, v ...interface{}) {
275
+ if logLevel == LogLevelDebug || logLevel == LogLevelInfo || logLevel == LogLevelWarn {
276
+ logMutex.Lock()
277
+ logger.Printf("[WARN] "+format, v...)
278
+ logMutex.Unlock()
279
+ }
280
+ }
281
+
282
+ func logError(format string, v ...interface{}) {
283
+ logMutex.Lock()
284
+ logger.Printf("[ERROR] "+format, v...)
285
+ logMutex.Unlock()
286
+
287
+ // 错误计数
288
+ atomic.AddInt64(&errorCounter, 1)
289
+ }
290
+
291
+ // OpenAI/DeepSeek 消息格式
292
+ type APIMessage struct {
293
+ Role string `json:"role"`
294
+ Content interface{} `json:"content"` // 使用interface{}以支持各种类型
295
+ }
296
+
297
+ // OpenAI/DeepSeek 请求格式
298
+ type APIRequest struct {
299
+ Model string `json:"model"`
300
+ Messages []APIMessage `json:"messages"`
301
+ Stream bool `json:"stream"`
302
+ Temperature float64 `json:"temperature,omitempty"`
303
+ MaxTokens int `json:"max_tokens,omitempty"`
304
+ }
305
+
306
+ // 腾讯混元请求格式
307
+ type HunyuanRequest struct {
308
+ Stream bool `json:"stream"`
309
+ Model string `json:"model"`
310
+ QueryID string `json:"query_id"`
311
+ Messages []APIMessage `json:"messages"`
312
+ StreamModeration bool `json:"stream_moderation"`
313
+ EnableEnhancement bool `json:"enable_enhancement"`
314
+ }
315
+
316
+ // 腾讯混元响应格式
317
+ type HunyuanResponse struct {
318
+ ID string `json:"id"`
319
+ Object string `json:"object"`
320
+ Created int64 `json:"created"`
321
+ Model string `json:"model"`
322
+ SystemFingerprint string `json:"system_fingerprint"`
323
+ Choices []Choice `json:"choices"`
324
+ Note string `json:"note,omitempty"`
325
+ }
326
+
327
+ // 选择结构
328
+ type Choice struct {
329
+ Index int `json:"index"`
330
+ Delta Delta `json:"delta"`
331
+ FinishReason *string `json:"finish_reason"`
332
+ }
333
+
334
+ // Delta结构,包含内容和推理内容
335
+ type Delta struct {
336
+ Role string `json:"role,omitempty"`
337
+ Content string `json:"content,omitempty"`
338
+ ReasoningContent string `json:"reasoning_content,omitempty"`
339
+ }
340
+
341
+ // DeepSeek 流式响应格式
342
+ type StreamChunk struct {
343
+ ID string `json:"id"`
344
+ Object string `json:"object"`
345
+ Created int64 `json:"created"`
346
+ Model string `json:"model"`
347
+ Choices []struct {
348
+ Index int `json:"index"`
349
+ FinishReason *string `json:"finish_reason,omitempty"`
350
+ Delta struct {
351
+ Role string `json:"role,omitempty"`
352
+ Content string `json:"content,omitempty"`
353
+ ReasoningContent string `json:"reasoning_content,omitempty"`
354
+ } `json:"delta"`
355
+ } `json:"choices"`
356
+ }
357
+
358
+ // 非流式响应格式
359
+ type CompletionResponse struct {
360
+ ID string `json:"id"`
361
+ Object string `json:"object"`
362
+ Created int64 `json:"created"`
363
+ Model string `json:"model"`
364
+ Choices []struct {
365
+ Index int `json:"index"`
366
+ FinishReason string `json:"finish_reason"`
367
+ Message struct {
368
+ Role string `json:"role"`
369
+ Content string `json:"content"`
370
+ ReasoningContent string `json:"reasoning_content,omitempty"`
371
+ } `json:"message"`
372
+ } `json:"choices"`
373
+ Usage struct {
374
+ PromptTokens int `json:"prompt_tokens"`
375
+ CompletionTokens int `json:"completion_tokens"`
376
+ TotalTokens int `json:"total_tokens"`
377
+ } `json:"usage"`
378
+ }
379
+
380
+ // 请求计数和互斥锁,用于监控
381
+ var (
382
+ requestCount uint64 = 0
383
+ countMutex sync.Mutex
384
+ )
385
+
386
+ // 主入口函数
387
+ func main() {
388
+ // 解析配置
389
+ appConfig = parseFlags()
390
+
391
+ // 初始化日志
392
+ initLogger(appConfig.LogLevel)
393
+
394
+ logInfo("启动服务: TargetURL=%s, Address=%s, Port=%s, Version=%s, LogLevel=%s, 支持模型=%v, BearerToken=***, WorkerCount=%d, QueueSize=%d, MaxConcurrent=%d",
395
+ TargetURL, appConfig.Address, appConfig.Port, Version, appConfig.LogLevel, SupportedModels,
396
+ appConfig.WorkerCount, appConfig.QueueSize, appConfig.MaxConcurrent)
397
+
398
+ // 创建工作池和信号量
399
+ workerPool = NewWorkerPool(appConfig.WorkerCount, appConfig.QueueSize)
400
+ requestSem = NewSemaphore(appConfig.MaxConcurrent)
401
+
402
+ logInfo("工作池已创建: %d个worker, 队列大小为%d", appConfig.WorkerCount, appConfig.QueueSize)
403
+
404
+ // 配置更高的并发处理能力
405
+ http.DefaultTransport.(*http.Transport).MaxIdleConnsPerHost = 100
406
+ http.DefaultTransport.(*http.Transport).MaxIdleConns = 100
407
+ http.DefaultTransport.(*http.Transport).IdleConnTimeout = 90 * time.Second
408
+
409
+ // 创建自定义服务器,支持更高并发
410
+ server := &http.Server{
411
+ Addr: appConfig.Address + ":" + appConfig.Port,
412
+ ReadTimeout: time.Duration(appConfig.Timeout) * time.Second,
413
+ WriteTimeout: time.Duration(appConfig.Timeout) * time.Second,
414
+ IdleTimeout: 120 * time.Second,
415
+ Handler: nil, // 使用默认的ServeMux
416
+ }
417
+
418
+ // 创建处理器
419
+ http.HandleFunc("/v1/models", func(w http.ResponseWriter, r *http.Request) {
420
+ setCORSHeaders(w)
421
+ if r.Method == "OPTIONS" {
422
+ w.WriteHeader(http.StatusOK)
423
+ return
424
+ }
425
+ handleModelsRequest(w, r)
426
+ })
427
+
428
+ http.HandleFunc("/v1/chat/completions", func(w http.ResponseWriter, r *http.Request) {
429
+ setCORSHeaders(w)
430
+ if r.Method == "OPTIONS" {
431
+ w.WriteHeader(http.StatusOK)
432
+ return
433
+ }
434
+
435
+ // 计数器增加
436
+ countMutex.Lock()
437
+ requestCount++
438
+ currentCount := requestCount
439
+ countMutex.Unlock()
440
+
441
+ logInfo("收到新请求 #%d", currentCount)
442
+
443
+ // 请求计数
444
+ atomic.AddInt64(&requestCounter, 1)
445
+
446
+ // 尝试获取信号量
447
+ if !requestSem.TryAcquire() {
448
+ // 请求数量超过限制
449
+ atomic.AddInt64(&rejectedRequests, 1)
450
+ logWarn("请求 #%d 被拒绝: 当前并发请求数已达上限", currentCount)
451
+ w.Header().Set("Retry-After", "30")
452
+ http.Error(w, "Server is busy, please try again later", http.StatusServiceUnavailable)
453
+ return
454
+ }
455
+
456
+ // 释放信号量(在函数返回时)
457
+ defer requestSem.Release()
458
+
459
+ // 处理请求
460
+ handleChatCompletionRequestWithPool(w, r, currentCount)
461
+ })
462
+
463
+ // 添加健康检查端点
464
+ http.HandleFunc("/health", func(w http.ResponseWriter, r *http.Request) {
465
+ setCORSHeaders(w)
466
+ if r.Method == "OPTIONS" {
467
+ w.WriteHeader(http.StatusOK)
468
+ return
469
+ }
470
+
471
+ // 获取各种计数器的值
472
+ reqCount := atomic.LoadInt64(&requestCounter)
473
+ succCount := atomic.LoadInt64(&successCounter)
474
+ errCount := atomic.LoadInt64(&errorCounter)
475
+ queuedCount := atomic.LoadInt64(&queuedRequests)
476
+ rejectedCount := atomic.LoadInt64(&rejectedRequests)
477
+
478
+ // 计算平均响应时间
479
+ var avgTime int64 = 0
480
+ if reqCount > 0 {
481
+ avgTime = atomic.LoadInt64(&avgResponseTime) / max(reqCount, 1)
482
+ }
483
+
484
+ // 构建延迟直方图数据
485
+ histogram := make([]int64, 10)
486
+ for i := 0; i < 10; i++ {
487
+ histogram[i] = atomic.LoadInt64(&latencyHistogram[i])
488
+ }
489
+
490
+ // 构建响应
491
+ stats := map[string]interface{}{
492
+ "status": "ok",
493
+ "version": Version,
494
+ "requests": reqCount,
495
+ "success": succCount,
496
+ "errors": errCount,
497
+ "queued": queuedCount,
498
+ "rejected": rejectedCount,
499
+ "avg_time_ms": avgTime,
500
+ "histogram_ms": histogram,
501
+ "worker_count": workerPool.workerCount,
502
+ "queue_size": len(workerPool.taskQueue),
503
+ "queue_capacity": cap(workerPool.taskQueue),
504
+ "queue_percent": float64(len(workerPool.taskQueue)) / float64(cap(workerPool.taskQueue)) * 100,
505
+ "concurrent_limit": appConfig.MaxConcurrent,
506
+ }
507
+
508
+ w.Header().Set("Content-Type", "application/json")
509
+ w.WriteHeader(http.StatusOK)
510
+ json.NewEncoder(w).Encode(stats)
511
+ })
512
+
513
+ // 创建停止通道
514
+ stop := make(chan os.Signal, 1)
515
+ signal.Notify(stop, os.Interrupt, syscall.SIGTERM)
516
+
517
+ // 在goroutine中启动服务器
518
+ go func() {
519
+ logInfo("Starting proxy server on %s", server.Addr)
520
+ if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
521
+ logError("Failed to start server: %v", err)
522
+ os.Exit(1)
523
+ }
524
+ }()
525
+
526
+ // 等待停止信号
527
+ <-stop
528
+
529
+ // 创建上下文用于优雅关闭
530
+ ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
531
+ defer cancel()
532
+
533
+ // 优雅关闭服务器
534
+ logInfo("Server is shutting down...")
535
+ if err := server.Shutdown(ctx); err != nil {
536
+ logError("Server shutdown failed: %v", err)
537
+ }
538
+
539
+ // 关闭工作池
540
+ workerPool.Shutdown()
541
+
542
+ logInfo("Server gracefully stopped")
543
+ }
544
+
545
+ // 设置CORS头
546
+ func setCORSHeaders(w http.ResponseWriter) {
547
+ w.Header().Set("Access-Control-Allow-Origin", "*")
548
+ w.Header().Set("Access-Control-Allow-Methods", "POST, GET, OPTIONS")
549
+ w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization")
550
+ }
551
+
552
+ // 验证消息格式
553
+ func validateMessages(messages []APIMessage) (bool, string) {
554
+ reqID := generateRequestID()
555
+ logDebug("[reqID:%s] 验证消息格式", reqID)
556
+
557
+ if messages == nil || len(messages) == 0 {
558
+ return false, "Messages array is required"
559
+ }
560
+
561
+ for _, msg := range messages {
562
+ if msg.Role == "" || msg.Content == nil {
563
+ return false, "Invalid message format: each message must have role and content"
564
+ }
565
+ }
566
+
567
+ return true, ""
568
+ }
569
+
570
+ // 从请求头中提取令牌
571
+ func extractToken(r *http.Request) (string, error) {
572
+ // 获取 Authorization 头部
573
+ authHeader := r.Header.Get("Authorization")
574
+ if authHeader == "" {
575
+ return "", fmt.Errorf("missing Authorization header")
576
+ }
577
+
578
+ // 验证格式并提取令牌
579
+ if !strings.HasPrefix(authHeader, "Bearer ") {
580
+ return "", fmt.Errorf("invalid Authorization header format, must start with 'Bearer '")
581
+ }
582
+
583
+ // 提取令牌值
584
+ token := strings.TrimPrefix(authHeader, "Bearer ")
585
+ if token == "" {
586
+ return "", fmt.Errorf("empty token in Authorization header")
587
+ }
588
+
589
+ return token, nil
590
+ }
591
+
592
+ // 转换任意类型的内容为字符串
593
+ func contentToString(content interface{}) string {
594
+ if content == nil {
595
+ return ""
596
+ }
597
+
598
+ switch v := content.(type) {
599
+ case string:
600
+ return v
601
+ default:
602
+ jsonBytes, err := json.Marshal(v)
603
+ if err != nil {
604
+ logWarn("将内容转换为JSON失败: %v", err)
605
+ return ""
606
+ }
607
+ return string(jsonBytes)
608
+ }
609
+ }
610
+
611
+ // 生成请求ID
612
+ func generateQueryID() string {
613
+ return fmt.Sprintf("%s%d", getRandomString(8), time.Now().UnixNano())
614
+ }
615
+
616
+ // 判断模型是否在支持列表中
617
+ func isModelSupported(modelName string) bool {
618
+ for _, supportedModel := range SupportedModels {
619
+ if modelName == supportedModel {
620
+ return true
621
+ }
622
+ }
623
+ return false
624
+ }
625
+
626
+ // 处理模型列表请求
627
+ func handleModelsRequest(w http.ResponseWriter, r *http.Request) {
628
+ logInfo("处理模型列表请求")
629
+
630
+ // 返回模型列表
631
+ w.Header().Set("Content-Type", "application/json")
632
+ w.WriteHeader(http.StatusOK)
633
+
634
+ // 构建模型数据
635
+ modelData := make([]map[string]interface{}, 0, len(SupportedModels))
636
+ for _, model := range SupportedModels {
637
+ modelData = append(modelData, map[string]interface{}{
638
+ "id": model,
639
+ "object": "model",
640
+ "created": time.Now().Unix(),
641
+ "owned_by": "TencentCloud",
642
+ "capabilities": map[string]interface{}{
643
+ "chat": true,
644
+ "completions": true,
645
+ "reasoning": true,
646
+ },
647
+ })
648
+ }
649
+
650
+ modelsList := map[string]interface{}{
651
+ "object": "list",
652
+ "data": modelData,
653
+ }
654
+
655
+ json.NewEncoder(w).Encode(modelsList)
656
+ }
657
+
658
+ // 处理聊天补全请求(使用工作池)
659
+ func handleChatCompletionRequestWithPool(w http.ResponseWriter, r *http.Request, requestNum uint64) {
660
+ reqID := generateRequestID()
661
+ startTime := time.Now()
662
+ logInfo("[reqID:%s] 处理聊天补全请求 #%d", reqID, requestNum)
663
+
664
+ // 设置超时上下文
665
+ ctx, cancel := context.WithTimeout(r.Context(), time.Duration(appConfig.Timeout)*time.Second)
666
+ defer cancel()
667
+
668
+ // 包含超时上下文的请求
669
+ r = r.WithContext(ctx)
670
+
671
+ // 添加恢复机制,防止panic
672
+ defer func() {
673
+ if r := recover(); r != nil {
674
+ logError("[reqID:%s] 处理请求时发生panic: %v", reqID, r)
675
+ http.Error(w, "Internal server error", http.StatusInternalServerError)
676
+ }
677
+ }()
678
+
679
+ // 解析请求体
680
+ var apiReq APIRequest
681
+ if err := json.NewDecoder(r.Body).Decode(&apiReq); err != nil {
682
+ logError("[reqID:%s] 解析请求失败: %v", reqID, err)
683
+ http.Error(w, "Invalid request body", http.StatusBadRequest)
684
+ return
685
+ }
686
+
687
+ // 验证消息格式
688
+ valid, errMsg := validateMessages(apiReq.Messages)
689
+ if !valid {
690
+ logError("[reqID:%s] 消息格式验证失败: %s", reqID, errMsg)
691
+ http.Error(w, errMsg, http.StatusBadRequest)
692
+ return
693
+ }
694
+
695
+ // 是否使用流式处理
696
+ isStream := apiReq.Stream
697
+
698
+ // 确定使用的模型
699
+ modelName := appConfig.ModelName
700
+ if apiReq.Model != "" {
701
+ // 检查请求的模型是否是我们支持的
702
+ if isModelSupported(apiReq.Model) {
703
+ modelName = apiReq.Model
704
+ } else {
705
+ logWarn("[reqID:%s] 请求的模型 %s 不支持,使用默认模型 %s", reqID, apiReq.Model, modelName)
706
+ }
707
+ }
708
+
709
+ logInfo("[reqID:%s] 使用模型: %s", reqID, modelName)
710
+
711
+ // 创建混元API请求
712
+ hunyuanReq := HunyuanRequest{
713
+ Stream: true, // 混元API总是使用流式响应
714
+ Model: modelName,
715
+ QueryID: generateQueryID(),
716
+ Messages: apiReq.Messages,
717
+ StreamModeration: true,
718
+ EnableEnhancement: false,
719
+ }
720
+
721
+ // 创建任务
722
+ task := &Task{
723
+ r: r,
724
+ w: w,
725
+ done: make(chan struct{}),
726
+ reqID: reqID,
727
+ isStream: isStream,
728
+ hunyuanReq: hunyuanReq,
729
+ }
730
+
731
+ // 添加到任务队列
732
+ atomic.AddInt64(&queuedRequests, 1)
733
+ submitted, err := workerPool.SubmitTask(task)
734
+ if !submitted {
735
+ atomic.AddInt64(&queuedRequests, -1)
736
+ atomic.AddInt64(&rejectedRequests, 1)
737
+ logError("[reqID:%s] 提交任务失败: %v", reqID, err)
738
+ w.Header().Set("Retry-After", "60")
739
+ http.Error(w, "Server queue is full, please try again later", http.StatusServiceUnavailable)
740
+ return
741
+ }
742
+
743
+ logInfo("[reqID:%s] 任务已提交到队列", reqID)
744
+
745
+ // 等待任务完成或超时
746
+ select {
747
+ case <-task.done:
748
+ // 任务已完成
749
+ logInfo("[reqID:%s] 任务已完成", reqID)
750
+ case <-r.Context().Done():
751
+ // 请求被取消或超时
752
+ logWarn("[reqID:%s] 请求被取消或超时", reqID)
753
+ // 注意:虽然请求被取消,但worker可能仍在处理任务
754
+ }
755
+
756
+ // 请求处理完成,更新指标
757
+ atomic.AddInt64(&queuedRequests, -1)
758
+ elapsed := time.Since(startTime).Milliseconds()
759
+
760
+ // 更新延迟直方图
761
+ bucketIndex := min(int(elapsed/100), 9)
762
+ atomic.AddInt64(&latencyHistogram[bucketIndex], 1)
763
+
764
+ // 更新平均响应时间
765
+ atomic.AddInt64(&avgResponseTime, elapsed)
766
+
767
+ if r.Context().Err() == nil {
768
+ // 成功计数增加
769
+ atomic.AddInt64(&successCounter, 1)
770
+ logInfo("[reqID:%s] 请求处理成功,耗时: %dms", reqID, elapsed)
771
+ } else {
772
+ logError("[reqID:%s] 请求处理失败: %v, 耗时: %dms", reqID, r.Context().Err(), elapsed)
773
+ }
774
+ }
775
+
776
+ // 处理聊天补全请求(原实现,已不使用)
777
+ func handleChatCompletionRequest(w http.ResponseWriter, r *http.Request) {
778
+ reqID := generateRequestID()
779
+ startTime := time.Now()
780
+ logInfo("[reqID:%s] 处理聊天补全请求", reqID)
781
+
782
+ // 解析请求体
783
+ var apiReq APIRequest
784
+ if err := json.NewDecoder(r.Body).Decode(&apiReq); err != nil {
785
+ logError("[reqID:%s] 解析请求失败: %v", reqID, err)
786
+ http.Error(w, "Invalid request body", http.StatusBadRequest)
787
+ return
788
+ }
789
+
790
+ // 验证消息格式
791
+ valid, errMsg := validateMessages(apiReq.Messages)
792
+ if !valid {
793
+ logError("[reqID:%s] 消息格式验证失败: %s", reqID, errMsg)
794
+ http.Error(w, errMsg, http.StatusBadRequest)
795
+ return
796
+ }
797
+
798
+ // 是否使用流式处理
799
+ isStream := apiReq.Stream
800
+
801
+ // 确定使用的模型
802
+ modelName := appConfig.ModelName
803
+ if apiReq.Model != "" {
804
+ // 检查请求的模型是否是我们支持的
805
+ if isModelSupported(apiReq.Model) {
806
+ modelName = apiReq.Model
807
+ } else {
808
+ logWarn("[reqID:%s] 请求的模型 %s 不支持,使用默认模型 %s", reqID, apiReq.Model, modelName)
809
+ }
810
+ }
811
+
812
+ logInfo("[reqID:%s] 使用模型: %s", reqID, modelName)
813
+
814
+ // 创建混元API请求
815
+ hunyuanReq := HunyuanRequest{
816
+ Stream: true, // 混元API总是使用流式响应
817
+ Model: modelName,
818
+ QueryID: generateQueryID(),
819
+ Messages: apiReq.Messages,
820
+ StreamModeration: true,
821
+ EnableEnhancement: false,
822
+ }
823
+
824
+ // 转发请求到混元API
825
+ var responseErr error
826
+ if isStream {
827
+ responseErr = handleStreamingRequest(w, r, hunyuanReq, reqID)
828
+ } else {
829
+ responseErr = handleNonStreamingRequest(w, r, hunyuanReq, reqID)
830
+ }
831
+
832
+ // 请求处理完成,更新指标
833
+ elapsed := time.Since(startTime).Milliseconds()
834
+
835
+ // 更新延迟直方图
836
+ bucketIndex := min(int(elapsed/100), 9)
837
+ atomic.AddInt64(&latencyHistogram[bucketIndex], 1)
838
+
839
+ // 更新平均响应时间
840
+ atomic.AddInt64(&avgResponseTime, elapsed)
841
+
842
+ if responseErr == nil {
843
+ // 成功计数增加
844
+ atomic.AddInt64(&successCounter, 1)
845
+ logInfo("[reqID:%s] 请求处理成功,耗时: %dms", reqID, elapsed)
846
+ } else {
847
+ logError("[reqID:%s] 请求处理失败: %v, 耗时: %dms", reqID, responseErr, elapsed)
848
+ }
849
+ }
850
+
851
+ // 安全的HTTP客户端,支持禁用SSL验证
852
+ func getHTTPClient() *http.Client {
853
+ tr := &http.Transport{
854
+ MaxIdleConnsPerHost: 100,
855
+ IdleConnTimeout: 90 * time.Second,
856
+ TLSClientConfig: nil, // 默认配置
857
+ }
858
+
859
+ // 如果配置了禁用SSL验证
860
+ if !appConfig.VerifySSL {
861
+ tr.TLSClientConfig = &tls.Config{InsecureSkipVerify: true}
862
+ }
863
+
864
+ return &http.Client{
865
+ Timeout: time.Duration(appConfig.Timeout) * time.Second,
866
+ Transport: tr,
867
+ }
868
+ }
869
+
870
+ // 处理流式请求
871
+ func handleStreamingRequest(w http.ResponseWriter, r *http.Request, hunyuanReq HunyuanRequest, reqID string) error {
872
+ logInfo("[reqID:%s] 处理流式请求", reqID)
873
+
874
+ // 序列化请求
875
+ jsonData, err := json.Marshal(hunyuanReq)
876
+ if err != nil {
877
+ logError("[reqID:%s] 序列化请求失败: %v", reqID, err)
878
+ http.Error(w, "Internal server error", http.StatusInternalServerError)
879
+ return err
880
+ }
881
+
882
+ // 创建请求
883
+ httpReq, err := http.NewRequestWithContext(r.Context(), "POST", TargetURL, bytes.NewBuffer(jsonData))
884
+ if err != nil {
885
+ logError("[reqID:%s] 创建请求失败: %v", reqID, err)
886
+ http.Error(w, "Internal server error", http.StatusInternalServerError)
887
+ return err
888
+ }
889
+
890
+ // 设置请求头
891
+ httpReq.Header.Set("Content-Type", "application/json")
892
+ httpReq.Header.Set("Model", hunyuanReq.Model)
893
+ setCommonHeaders(httpReq)
894
+
895
+ // 创建HTTP客户端
896
+ client := getHTTPClient()
897
+
898
+ // 发送请求
899
+ resp, err := client.Do(httpReq)
900
+ if err != nil {
901
+ logError("[reqID:%s] 发送请求失败: %v", reqID, err)
902
+ http.Error(w, "Failed to connect to API", http.StatusBadGateway)
903
+ return err
904
+ }
905
+ defer resp.Body.Close()
906
+
907
+ // 检查响应状态
908
+ if resp.StatusCode != http.StatusOK {
909
+ logError("[reqID:%s] API返回非200状态码: %d", reqID, resp.StatusCode)
910
+
911
+ bodyBytes, _ := io.ReadAll(resp.Body)
912
+ logError("[reqID:%s] 错误响应内容: %s", reqID, string(bodyBytes))
913
+
914
+ http.Error(w, fmt.Sprintf("API error with status code: %d", resp.StatusCode), resp.StatusCode)
915
+ return fmt.Errorf("API返回非200状态码: %d", resp.StatusCode)
916
+ }
917
+
918
+ // 设置响应头
919
+ w.Header().Set("Content-Type", "text/event-stream")
920
+ w.Header().Set("Cache-Control", "no-cache")
921
+ w.Header().Set("Connection", "keep-alive")
922
+
923
+ // 创建响应ID和时间戳
924
+ respID := fmt.Sprintf("chatcmpl-%s", getRandomString(10))
925
+ createdTime := time.Now().Unix()
926
+
927
+ // 创建读取器
928
+ reader := bufio.NewReaderSize(resp.Body, 16384)
929
+
930
+ // 创建Flusher
931
+ flusher, ok := w.(http.Flusher)
932
+ if !ok {
933
+ logError("[reqID:%s] Streaming not supported", reqID)
934
+ http.Error(w, "Streaming not supported", http.StatusInternalServerError)
935
+ return fmt.Errorf("streaming not supported")
936
+ }
937
+
938
+ // 发送角色块
939
+ roleChunk := createRoleChunk(respID, createdTime, hunyuanReq.Model)
940
+ w.Write([]byte("data: " + string(roleChunk) + "\n\n"))
941
+ flusher.Flush()
942
+
943
+ // 持续读取响应
944
+ for {
945
+ // 添加超时检测
946
+ select {
947
+ case <-r.Context().Done():
948
+ logWarn("[reqID:%s] 请求超时或被客户端取消", reqID)
949
+ return fmt.Errorf("请求超时或被取消")
950
+ default:
951
+ // 继续处理
952
+ }
953
+
954
+ // 读取一行数据
955
+ line, err := reader.ReadBytes('\n')
956
+ if err != nil {
957
+ if err != io.EOF {
958
+ logError("[reqID:%s] 读取响应出错: %v", reqID, err)
959
+ return err
960
+ }
961
+ break
962
+ }
963
+
964
+ // 处理数据行
965
+ lineStr := string(line)
966
+ if strings.HasPrefix(lineStr, "data: ") {
967
+ jsonStr := strings.TrimPrefix(lineStr, "data: ")
968
+ jsonStr = strings.TrimSpace(jsonStr)
969
+
970
+ // 特殊处理[DONE]消息
971
+ if jsonStr == "[DONE]" {
972
+ logDebug("[reqID:%s] 收到[DONE]消息", reqID)
973
+ w.Write([]byte("data: [DONE]\n\n"))
974
+ flusher.Flush()
975
+ break
976
+ }
977
+
978
+ // 解析混元响应
979
+ var hunyuanResp HunyuanResponse
980
+ if err := json.Unmarshal([]byte(jsonStr), &hunyuanResp); err != nil {
981
+ logWarn("[reqID:%s] 解析JSON失败: %v, data: %s", reqID, err, jsonStr)
982
+ continue
983
+ }
984
+
985
+ // 处理各种类型的内容
986
+ for _, choice := range hunyuanResp.Choices {
987
+ if choice.Delta.Content != "" {
988
+ // 发送内容块
989
+ contentChunk := createContentChunk(respID, createdTime, hunyuanReq.Model, choice.Delta.Content)
990
+ w.Write([]byte("data: " + string(contentChunk) + "\n\n"))
991
+ flusher.Flush()
992
+ }
993
+
994
+ if choice.Delta.ReasoningContent != "" {
995
+ // 发送推理内容块
996
+ reasoningChunk := createReasoningChunk(respID, createdTime, hunyuanReq.Model, choice.Delta.ReasoningContent)
997
+ w.Write([]byte("data: " + string(reasoningChunk) + "\n\n"))
998
+ flusher.Flush()
999
+ }
1000
+
1001
+ // 处理完成标志
1002
+ if choice.FinishReason != nil {
1003
+ finishReason := *choice.FinishReason
1004
+ if finishReason != "" {
1005
+ doneChunk := createDoneChunk(respID, createdTime, hunyuanReq.Model, finishReason)
1006
+ w.Write([]byte("data: " + string(doneChunk) + "\n\n"))
1007
+ flusher.Flush()
1008
+ }
1009
+ }
1010
+ }
1011
+ }
1012
+ }
1013
+
1014
+ // 发送结束信号(如果没有正常结束)
1015
+ finishReason := "stop"
1016
+ doneChunk := createDoneChunk(respID, createdTime, hunyuanReq.Model, finishReason)
1017
+ w.Write([]byte("data: " + string(doneChunk) + "\n\n"))
1018
+ w.Write([]byte("data: [DONE]\n\n"))
1019
+ flusher.Flush()
1020
+
1021
+ return nil
1022
+ }
1023
+
1024
+ // 处理非��式请求
1025
+ func handleNonStreamingRequest(w http.ResponseWriter, r *http.Request, hunyuanReq HunyuanRequest, reqID string) error {
1026
+ logInfo("[reqID:%s] 处理非流式请求", reqID)
1027
+
1028
+ // 序列化请求
1029
+ jsonData, err := json.Marshal(hunyuanReq)
1030
+ if err != nil {
1031
+ logError("[reqID:%s] 序列化请求失败: %v", reqID, err)
1032
+ http.Error(w, "Internal server error", http.StatusInternalServerError)
1033
+ return err
1034
+ }
1035
+
1036
+ // 创建请求
1037
+ httpReq, err := http.NewRequestWithContext(r.Context(), "POST", TargetURL, bytes.NewBuffer(jsonData))
1038
+ if err != nil {
1039
+ logError("[reqID:%s] 创建请求失败: %v", reqID, err)
1040
+ http.Error(w, "Internal server error", http.StatusInternalServerError)
1041
+ return err
1042
+ }
1043
+
1044
+ // 设置请求头
1045
+ httpReq.Header.Set("Content-Type", "application/json")
1046
+ httpReq.Header.Set("Model", hunyuanReq.Model)
1047
+ setCommonHeaders(httpReq)
1048
+
1049
+ // 创建HTTP客户端
1050
+ client := getHTTPClient()
1051
+
1052
+ // 发送请求
1053
+ resp, err := client.Do(httpReq)
1054
+ if err != nil {
1055
+ logError("[reqID:%s] 发送请求失败: %v", reqID, err)
1056
+ http.Error(w, "Failed to connect to API", http.StatusBadGateway)
1057
+ return err
1058
+ }
1059
+ defer resp.Body.Close()
1060
+
1061
+ // 检查响应状态
1062
+ if resp.StatusCode != http.StatusOK {
1063
+ logError("[reqID:%s] API返回非200状态码: %d", reqID, resp.StatusCode)
1064
+
1065
+ bodyBytes, _ := io.ReadAll(resp.Body)
1066
+ logError("[reqID:%s] 错误响应内容: %s", reqID, string(bodyBytes))
1067
+
1068
+ http.Error(w, fmt.Sprintf("API error with status code: %d", resp.StatusCode), resp.StatusCode)
1069
+ return fmt.Errorf("API返回非200状态码: %d", resp.StatusCode)
1070
+ }
1071
+
1072
+ // 读取完整的流式响应
1073
+ bodyBytes, err := io.ReadAll(resp.Body)
1074
+ if err != nil {
1075
+ logError("[reqID:%s] 读取响应失败: %v", reqID, err)
1076
+ http.Error(w, "Failed to read API response", http.StatusInternalServerError)
1077
+ return err
1078
+ }
1079
+
1080
+ // 解析流式响应并提取完整内容
1081
+ fullContent, reasoningContent, err := extractFullContentFromStream(bodyBytes, reqID)
1082
+ if err != nil {
1083
+ logError("[reqID:%s] 解析流式响应失败: %v", reqID, err)
1084
+ http.Error(w, "Failed to parse streaming response", http.StatusInternalServerError)
1085
+ return err
1086
+ }
1087
+
1088
+ // 构建完整的非流式响应
1089
+ completionResponse := CompletionResponse{
1090
+ ID: fmt.Sprintf("chatcmpl-%s", getRandomString(10)),
1091
+ Object: "chat.completion",
1092
+ Created: time.Now().Unix(),
1093
+ Model: hunyuanReq.Model,
1094
+ Choices: []struct {
1095
+ Index int `json:"index"`
1096
+ FinishReason string `json:"finish_reason"`
1097
+ Message struct {
1098
+ Role string `json:"role"`
1099
+ Content string `json:"content"`
1100
+ ReasoningContent string `json:"reasoning_content,omitempty"`
1101
+ } `json:"message"`
1102
+ }{
1103
+ {
1104
+ Index: 0,
1105
+ FinishReason: "stop",
1106
+ Message: struct {
1107
+ Role string `json:"role"`
1108
+ Content string `json:"content"`
1109
+ ReasoningContent string `json:"reasoning_content,omitempty"`
1110
+ }{
1111
+ Role: "assistant",
1112
+ Content: fullContent,
1113
+ ReasoningContent: reasoningContent,
1114
+ },
1115
+ },
1116
+ },
1117
+ Usage: struct {
1118
+ PromptTokens int `json:"prompt_tokens"`
1119
+ CompletionTokens int `json:"completion_tokens"`
1120
+ TotalTokens int `json:"total_tokens"`
1121
+ }{
1122
+ PromptTokens: len(formatMessages(hunyuanReq.Messages)) / 4,
1123
+ CompletionTokens: len(fullContent) / 4,
1124
+ TotalTokens: (len(formatMessages(hunyuanReq.Messages)) + len(fullContent)) / 4,
1125
+ },
1126
+ }
1127
+
1128
+ // 返回响应
1129
+ w.Header().Set("Content-Type", "application/json")
1130
+ if err := json.NewEncoder(w).Encode(completionResponse); err != nil {
1131
+ logError("[reqID:%s] 编码响应失败: %v", reqID, err)
1132
+ http.Error(w, "Failed to encode response", http.StatusInternalServerError)
1133
+ return err
1134
+ }
1135
+
1136
+ return nil
1137
+ }
1138
+
1139
+ // 从流式响应中提取完整内容
1140
+ func extractFullContentFromStream(bodyBytes []byte, reqID string) (string, string, error) {
1141
+ bodyStr := string(bodyBytes)
1142
+ lines := strings.Split(bodyStr, "\n")
1143
+
1144
+ // 内容累积器
1145
+ var contentBuilder strings.Builder
1146
+ var reasoningBuilder strings.Builder
1147
+
1148
+ // 解析每一行
1149
+ for _, line := range lines {
1150
+ if strings.HasPrefix(line, "data: ") && !strings.Contains(line, "[DONE]") {
1151
+ jsonStr := strings.TrimPrefix(line, "data: ")
1152
+ jsonStr = strings.TrimSpace(jsonStr)
1153
+
1154
+ // 解析JSON
1155
+ var hunyuanResp HunyuanResponse
1156
+ if err := json.Unmarshal([]byte(jsonStr), &hunyuanResp); err != nil {
1157
+ continue // 跳过无效JSON
1158
+ }
1159
+
1160
+ // 提取内容和推理内容
1161
+ for _, choice := range hunyuanResp.Choices {
1162
+ if choice.Delta.Content != "" {
1163
+ contentBuilder.WriteString(choice.Delta.Content)
1164
+ }
1165
+ if choice.Delta.ReasoningContent != "" {
1166
+ reasoningBuilder.WriteString(choice.Delta.ReasoningContent)
1167
+ }
1168
+ }
1169
+ }
1170
+ }
1171
+
1172
+ return contentBuilder.String(), reasoningBuilder.String(), nil
1173
+ }
1174
+
1175
+ // 创建角色块
1176
+ func createRoleChunk(id string, created int64, model string) []byte {
1177
+ chunk := StreamChunk{
1178
+ ID: id,
1179
+ Object: "chat.completion.chunk",
1180
+ Created: created,
1181
+ Model: model,
1182
+ Choices: []struct {
1183
+ Index int `json:"index"`
1184
+ FinishReason *string `json:"finish_reason,omitempty"`
1185
+ Delta struct {
1186
+ Role string `json:"role,omitempty"`
1187
+ Content string `json:"content,omitempty"`
1188
+ ReasoningContent string `json:"reasoning_content,omitempty"`
1189
+ } `json:"delta"`
1190
+ }{
1191
+ {
1192
+ Index: 0,
1193
+ Delta: struct {
1194
+ Role string `json:"role,omitempty"`
1195
+ Content string `json:"content,omitempty"`
1196
+ ReasoningContent string `json:"reasoning_content,omitempty"`
1197
+ }{
1198
+ Role: "assistant",
1199
+ },
1200
+ },
1201
+ },
1202
+ }
1203
+
1204
+ data, _ := json.Marshal(chunk)
1205
+ return data
1206
+ }
1207
+
1208
+ // 创建内容块
1209
+ func createContentChunk(id string, created int64, model string, content string) []byte {
1210
+ chunk := StreamChunk{
1211
+ ID: id,
1212
+ Object: "chat.completion.chunk",
1213
+ Created: created,
1214
+ Model: model,
1215
+ Choices: []struct {
1216
+ Index int `json:"index"`
1217
+ FinishReason *string `json:"finish_reason,omitempty"`
1218
+ Delta struct {
1219
+ Role string `json:"role,omitempty"`
1220
+ Content string `json:"content,omitempty"`
1221
+ ReasoningContent string `json:"reasoning_content,omitempty"`
1222
+ } `json:"delta"`
1223
+ }{
1224
+ {
1225
+ Index: 0,
1226
+ Delta: struct {
1227
+ Role string `json:"role,omitempty"`
1228
+ Content string `json:"content,omitempty"`
1229
+ ReasoningContent string `json:"reasoning_content,omitempty"`
1230
+ }{
1231
+ Content: content,
1232
+ },
1233
+ },
1234
+ },
1235
+ }
1236
+
1237
+ data, _ := json.Marshal(chunk)
1238
+ return data
1239
+ }
1240
+
1241
+ // 创建推理内容块
1242
+ func createReasoningChunk(id string, created int64, model string, reasoningContent string) []byte {
1243
+ chunk := StreamChunk{
1244
+ ID: id,
1245
+ Object: "chat.completion.chunk",
1246
+ Created: created,
1247
+ Model: model,
1248
+ Choices: []struct {
1249
+ Index int `json:"index"`
1250
+ FinishReason *string `json:"finish_reason,omitempty"`
1251
+ Delta struct {
1252
+ Role string `json:"role,omitempty"`
1253
+ Content string `json:"content,omitempty"`
1254
+ ReasoningContent string `json:"reasoning_content,omitempty"`
1255
+ } `json:"delta"`
1256
+ }{
1257
+ {
1258
+ Index: 0,
1259
+ Delta: struct {
1260
+ Role string `json:"role,omitempty"`
1261
+ Content string `json:"content,omitempty"`
1262
+ ReasoningContent string `json:"reasoning_content,omitempty"`
1263
+ }{
1264
+ ReasoningContent: reasoningContent,
1265
+ },
1266
+ },
1267
+ },
1268
+ }
1269
+
1270
+ data, _ := json.Marshal(chunk)
1271
+ return data
1272
+ }
1273
+
1274
+ // 创建完成块
1275
+ func createDoneChunk(id string, created int64, model string, reason string) []byte {
1276
+ finishReason := reason
1277
+ chunk := StreamChunk{
1278
+ ID: id,
1279
+ Object: "chat.completion.chunk",
1280
+ Created: created,
1281
+ Model: model,
1282
+ Choices: []struct {
1283
+ Index int `json:"index"`
1284
+ FinishReason *string `json:"finish_reason,omitempty"`
1285
+ Delta struct {
1286
+ Role string `json:"role,omitempty"`
1287
+ Content string `json:"content,omitempty"`
1288
+ ReasoningContent string `json:"reasoning_content,omitempty"`
1289
+ } `json:"delta"`
1290
+ }{
1291
+ {
1292
+ Index: 0,
1293
+ FinishReason: &finishReason,
1294
+ Delta: struct {
1295
+ Role string `json:"role,omitempty"`
1296
+ Content string `json:"content,omitempty"`
1297
+ ReasoningContent string `json:"reasoning_content,omitempty"`
1298
+ }{},
1299
+ },
1300
+ },
1301
+ }
1302
+
1303
+ data, _ := json.Marshal(chunk)
1304
+ return data
1305
+ }
1306
+
1307
+ // 设置常见的请求头 - 参考Python版本
1308
+ func setCommonHeaders(req *http.Request) {
1309
+ req.Header.Set("accept", "*/*")
1310
+ req.Header.Set("accept-language", "zh-CN,zh;q=0.9,en;q=0.8,zh-TW;q=0.7")
1311
+ req.Header.Set("authorization", "Bearer "+appConfig.BearerToken)
1312
+ req.Header.Set("dnt", "1")
1313
+ req.Header.Set("origin", "https://llm.hunyuan.tencent.com")
1314
+ req.Header.Set("polaris", "stream-server-online-sbs-10697")
1315
+ req.Header.Set("priority", "u=1, i")
1316
+ req.Header.Set("referer", "https://llm.hunyuan.tencent.com/")
1317
+ req.Header.Set("sec-ch-ua", "\"Chromium\";v=\"134\", \"Not:A-Brand\";v=\"24\", \"Google Chrome\";v=\"134\"")
1318
+ req.Header.Set("sec-ch-ua-mobile", "?0")
1319
+ req.Header.Set("sec-ch-ua-platform", "\"Windows\"")
1320
+ req.Header.Set("sec-fetch-dest", "empty")
1321
+ req.Header.Set("sec-fetch-mode", "cors")
1322
+ req.Header.Set("sec-fetch-site", "same-origin")
1323
+ req.Header.Set("staffname", "staryxzhang")
1324
+ req.Header.Set("wsid", "10697")
1325
+ req.Header.Set("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36")
1326
+ }
1327
+
1328
+ // 生成请求ID
1329
+ func generateRequestID() string {
1330
+ return fmt.Sprintf("%x", time.Now().UnixNano())
1331
+ }
1332
+
1333
+ // 生成随机字符串
1334
+ func getRandomString(length int) string {
1335
+ const charset = "abcdefghijklmnopqrstuvwxyz0123456789"
1336
+ b := make([]byte, length)
1337
+ for i := range b {
1338
+ b[i] = charset[time.Now().UnixNano()%int64(len(charset))]
1339
+ time.Sleep(1 * time.Nanosecond)
1340
+ }
1341
+ return string(b)
1342
+ }
1343
+
1344
+ // 格式化消息为字符串
1345
+ func formatMessages(messages []APIMessage) string {
1346
+ var result strings.Builder
1347
+ for _, msg := range messages {
1348
+ result.WriteString(msg.Role)
1349
+ result.WriteString(": ")
1350
+ result.WriteString(contentToString(msg.Content))
1351
+ result.WriteString("\n")
1352
+ }
1353
+ return result.String()
1354
+ }
1355
+
1356
+ // 获取两个整数中的最小值
1357
+ func min(a, b int) int {
1358
+ if a < b {
1359
+ return a
1360
+ }
1361
+ return b
1362
+ }
1363
+
1364
+ // 获取两个整数中的最大值
1365
+ func max(a, b int64) int64 {
1366
+ if a > b {
1367
+ return a
1368
+ }
1369
+ return b
1370
+ }