Calmlo commited on
Commit
5838dda
·
verified ·
1 Parent(s): e706cfb

Update server.js

Browse files
Files changed (1) hide show
  1. server.js +171 -153
server.js CHANGED
@@ -118,7 +118,7 @@ app.get('/v1/models', (req, res) => {
118
  function convertMessagesToFalPrompt(messages) {
119
  let fixed_system_prompt_content = "";
120
  const conversation_message_blocks = [];
121
- console.log(`Original messages count: ${messages.length}`);
122
 
123
  // 1. 分离 System 消息,格式化 User/Assistant 消息
124
  for (const message of messages) {
@@ -145,7 +145,6 @@ function convertMessagesToFalPrompt(messages) {
145
  fixed_system_prompt_content = fixed_system_prompt_content.substring(0, SYSTEM_PROMPT_LIMIT);
146
  console.warn(`Combined system messages truncated from ${originalLength} to ${SYSTEM_PROMPT_LIMIT}`);
147
  }
148
- // 清理末尾可能多余的空白,以便后续判断和拼接
149
  fixed_system_prompt_content = fixed_system_prompt_content.trim();
150
 
151
 
@@ -155,7 +154,7 @@ function convertMessagesToFalPrompt(messages) {
155
  space_occupied_by_fixed_system = fixed_system_prompt_content.length + 4; // 预留 \n\n...\n\n 的长度
156
  }
157
  const remaining_system_limit = Math.max(0, SYSTEM_PROMPT_LIMIT - space_occupied_by_fixed_system);
158
- console.log(`Trimmed fixed system prompt length: ${fixed_system_prompt_content.length}. Approx remaining system history limit: ${remaining_system_limit}`);
159
 
160
 
161
  // 4. 反向填充 User/Assistant 对话历史
@@ -166,13 +165,13 @@ function convertMessagesToFalPrompt(messages) {
166
  let promptFull = false;
167
  let systemHistoryFull = (remaining_system_limit <= 0);
168
 
169
- console.log(`Processing ${conversation_message_blocks.length} user/assistant messages for recency filling.`);
170
  for (let i = conversation_message_blocks.length - 1; i >= 0; i--) {
171
  const message_block = conversation_message_blocks[i];
172
  const block_length = message_block.length;
173
 
174
  if (promptFull && systemHistoryFull) {
175
- console.log(`Both prompt and system history slots full. Omitting older messages from index ${i}.`);
176
  break;
177
  }
178
 
@@ -184,7 +183,7 @@ function convertMessagesToFalPrompt(messages) {
184
  continue;
185
  } else {
186
  promptFull = true;
187
- console.log(`Prompt limit (${PROMPT_LIMIT}) reached. Trying system history slot.`);
188
  }
189
  }
190
 
@@ -196,7 +195,7 @@ function convertMessagesToFalPrompt(messages) {
196
  continue;
197
  } else {
198
  systemHistoryFull = true;
199
- console.log(`System history limit (${remaining_system_limit}) reached.`);
200
  }
201
  }
202
  }
@@ -204,46 +203,36 @@ function convertMessagesToFalPrompt(messages) {
204
  // 5. *** 组合最终的 prompt 和 system_prompt (包含分隔符逻辑) ***
205
  const system_prompt_history_content = system_prompt_history_blocks.join('').trim();
206
  const final_prompt = prompt_history_blocks.join('').trim();
207
-
208
- // 定义分隔符
209
  const SEPARATOR = "\n\n-------下面是比较早之前的对话内容-----\n\n";
210
-
211
  let final_system_prompt = "";
212
-
213
- // 检查各部分是否有内容 (使用 trim 后的固定部分)
214
  const hasFixedSystem = fixed_system_prompt_content.length > 0;
215
  const hasSystemHistory = system_prompt_history_content.length > 0;
216
 
217
  if (hasFixedSystem && hasSystemHistory) {
218
- // 两部分都有,用分隔符连接
219
  final_system_prompt = fixed_system_prompt_content + SEPARATOR + system_prompt_history_content;
220
- console.log("Combining fixed system prompt and history with separator.");
221
  } else if (hasFixedSystem) {
222
- // 只有固定部分
223
  final_system_prompt = fixed_system_prompt_content;
224
- console.log("Using only fixed system prompt.");
225
  } else if (hasSystemHistory) {
226
- // 只有历史部分 (固定部分为空)
227
  final_system_prompt = system_prompt_history_content;
228
- console.log("Using only history in system prompt slot.");
229
  }
230
- // 如果两部分都为空,final_system_prompt 保持空字符串 ""
231
 
232
- // 6. 返回结果
233
  const result = {
234
- system_prompt: final_system_prompt, // 最终结果不需要再 trim
235
- prompt: final_prompt // final_prompt 在组合前已 trim
236
  };
237
 
238
- console.log(`Final system_prompt length (Sys+Separator+Hist): ${result.system_prompt.length}`);
239
- console.log(`Final prompt length (Hist): ${result.prompt.length}`);
240
 
241
  return result;
242
  }
243
  // === convertMessagesToFalPrompt 函数结束 ===
244
 
245
 
246
- // POST /v1/chat/completions endpoint (带 Key 重试逻辑)
247
  app.post('/v1/chat/completions', async (req, res) => {
248
  const { model, messages, stream = false, reasoning = false, ...restOpenAIParams } = req.body;
249
  const requestId = `req-${Date.now()}`; // Unique ID for this incoming request
@@ -261,6 +250,20 @@ app.post('/v1/chat/completions', async (req, res) => {
261
  let lastError = null; // Store the last error encountered during key rotation
262
  let success = false; // Flag to indicate if any key succeeded
263
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  // *** 重试循环:尝试最多 falKeys.length 次 ***
265
  for (let attempt = 0; attempt < falKeys.length; attempt++) {
266
  const keyIndexToTry = (currentFalKeyIndex + attempt) % falKeys.length;
@@ -273,31 +276,6 @@ app.post('/v1/chat/completions', async (req, res) => {
273
  credentials: selectedFalKey,
274
  });
275
 
276
- // 准备 Fal Input (只需要准备一次)
277
- // 注意:如果 convertMessagesToFalPrompt 很耗时,可以移到循环外
278
- const { prompt, system_prompt } = convertMessagesToFalPrompt(messages);
279
- const falInput = {
280
- model: model,
281
- prompt: prompt,
282
- ...(system_prompt && { system_prompt: system_prompt }),
283
- reasoning: !!reasoning,
284
- };
285
-
286
- // 打印一次 Fal Input 和 Prompt 信息
287
- if (attempt === 0) {
288
- console.log(`[${requestId}] Fal Input:`, JSON.stringify(falInput, null, 2));
289
- console.log(`[${requestId}] Forwarding request to fal-ai with system-priority + separator + recency input:`);
290
- console.log(`[${requestId}] System Prompt Length:`, system_prompt?.length || 0);
291
- console.log(`[${requestId}] Prompt Length:`, prompt?.length || 0);
292
- // 为了简洁,默认注释掉详细内容的打印
293
- // console.log(`[${requestId}] --- System Prompt Start ---`);
294
- // console.log(system_prompt);
295
- // console.log(`[${requestId}] --- System Prompt End ---`);
296
- // console.log(`[${requestId}] --- Prompt Start ---`);
297
- // console.log(prompt);
298
- // console.log(`[${requestId}] --- Prompt End ---`);
299
- }
300
-
301
  // --- 执行 Fal AI 调用 ---
302
  if (stream) {
303
  // --- 流式处理 ---
@@ -305,138 +283,183 @@ app.post('/v1/chat/completions', async (req, res) => {
305
  res.setHeader('Cache-Control', 'no-cache');
306
  res.setHeader('Connection', 'keep-alive');
307
  res.setHeader('Access-Control-Allow-Origin', '*');
308
- // 注意:Headers 只能发送一次
309
 
310
  let previousOutput = '';
311
- const falStream = await fal.stream("fal-ai/any-llm", { input: falInput });
 
 
312
 
313
- // 标记成功,设置下一次请求的起始 Key 索引
314
- success = true;
315
- currentFalKeyIndex = (keyIndexToTry + 1) % falKeys.length;
316
- console.log(`[${requestId}] Key at index ${keyIndexToTry} successful. Next request starts at index ${currentFalKeyIndex}.`);
317
 
318
  // 处理流
319
  try {
320
- if (!res.headersSent) { // 确保 header 只发送一次
321
- res.flushHeaders();
322
- console.log(`[${requestId}] Stream headers flushed.`);
323
- }
324
  for await (const event of falStream) {
325
- // ... (流处理逻辑基本不变,添加 requestId 用于日志) ...
326
  const currentOutput = (event && typeof event.output === 'string') ? event.output : '';
327
  const isPartial = (event && typeof event.partial === 'boolean') ? event.partial : true;
328
  const errorInfo = (event && event.error) ? event.error : null;
 
329
 
 
330
  if (errorInfo) {
331
- console.error(`[${requestId}] Error received in fal stream event:`, errorInfo);
332
- const errorChunk = { id: `chatcmpl-${Date.now()}-error`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: {}, finish_reason: "error", message: { role: 'assistant', content: `Fal Stream Error: ${JSON.stringify(errorInfo)}` } }] };
333
- if (!res.writableEnded) {
334
- res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
  }
336
- break; // Stop processing on error in stream event
337
  }
338
 
339
- let deltaContent = '';
340
- if (currentOutput.startsWith(previousOutput)) {
341
- deltaContent = currentOutput.substring(previousOutput.length);
342
- } else if (currentOutput.length > 0) {
343
- console.warn(`[${requestId}] Fal stream output mismatch detected. Sending full current output as delta.`, { previousLength: previousOutput.length, currentLength: currentOutput.length });
344
- deltaContent = currentOutput;
345
- previousOutput = ''; // Reset previous if mismatch
 
 
 
 
346
  }
347
- previousOutput = currentOutput;
348
 
349
- // Send chunk if there's content or if it's the final chunk (isPartial is false)
350
- if (deltaContent || !isPartial) {
351
- const openAIChunk = { id: `chatcmpl-${Date.now()}`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: { content: deltaContent }, finish_reason: isPartial === false ? "stop" : null }] };
352
- if (!res.writableEnded) {
353
- res.write(`data: ${JSON.stringify(openAIChunk)}\n\n`);
354
- }
 
 
 
 
 
 
 
 
 
 
 
 
355
  }
356
- }
357
- // After the loop, ensure the [DONE] signal is sent if the stream finished normally
358
- if (!res.writableEnded) {
359
- res.write(`data: [DONE]\n\n`);
360
- res.end();
361
- console.log(`[${requestId}] Stream finished and [DONE] sent.`);
362
- }
363
- } catch (streamError) {
364
- console.error(`[${requestId}] Error during fal stream processing loop:`, streamError);
365
- lastError = streamError; // Store error from stream processing
366
- try {
367
- // Don't mark success=false here, the key worked but the stream itself failed.
368
- // The outer loop should break because the response has likely been ended.
369
- if (!res.writableEnded) { // Check if we can still write to the response
370
- const errorDetails = (streamError instanceof Error) ? streamError.message : JSON.stringify(streamError);
371
- res.write(`data: ${JSON.stringify({ error: { message: "Stream processing error", type: "proxy_error", details: errorDetails } })}\n\n`);
372
- res.write(`data: [DONE]\n\n`); // Send DONE even after error
373
  res.end();
374
- } else {
375
- console.error(`[${requestId}] Stream already ended, cannot send error message.`);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
  }
377
- } catch (finalError) {
378
- console.error(`[${requestId}] Error sending stream error message to client:`, finalError);
379
- if (!res.writableEnded) { res.end(); }
380
  }
381
- // Break the outer key retry loop as the stream failed mid-way
382
- break;
383
  }
384
- // 如果流成功处理完,直接跳出重试循环
385
- break; // Exit the key retry loop
 
 
 
 
 
386
 
387
  } else {
388
- // --- 非流式处理 ---
389
  console.log(`[${requestId}] Executing non-stream request with key index ${keyIndexToTry}...`);
390
  const result = await fal.subscribe("fal-ai/any-llm", { input: falInput, logs: true });
391
 
392
- // 检查 Fal AI 返回的业务错误 (例如输入无效),这种错误不应该因为换 Key 而解决
393
  if (result && result.error) {
394
  console.error(`[${requestId}] Fal-ai returned a business error with key index ${keyIndexToTry}:`, result.error);
395
- // 将此视为最终错误,不重试其他 key
396
  lastError = new Error(`Fal-ai error: ${JSON.stringify(result.error)}`);
397
- lastError.status = 500; // Or map from Fal error if possible, default 500
398
  lastError.type = "fal_ai_error";
399
- break; // Exit retry loop, no point trying other keys for bad input
 
400
  }
401
 
402
  console.log(`[${requestId}] Received non-stream result from fal-ai with key index ${keyIndexToTry}`);
403
- // console.log("Full non-stream result:", JSON.stringify(result, null, 2)); // Uncomment for detailed logs
404
-
405
- // 标记成功,设置下一次请求的起始 Key 索引
406
- success = true;
407
- currentFalKeyIndex = (keyIndexToTry + 1) % falKeys.length;
408
- console.log(`[${requestId}] Key at index ${keyIndexToTry} successful. Next request starts at index ${currentFalKeyIndex}.`);
409
 
 
 
 
410
 
411
  const openAIResponse = {
412
  id: `chatcmpl-${result.requestId || Date.now()}`, object: "chat.completion", created: Math.floor(Date.now() / 1000), model: model,
413
  choices: [{ index: 0, message: { role: "assistant", content: result.output || "" }, finish_reason: "stop" }],
414
- usage: { prompt_tokens: null, completion_tokens: null, total_tokens: null }, // Fal doesn't provide token usage
415
- system_fingerprint: null, // Fal doesn't provide system fingerprint
416
- ...(result.reasoning && { fal_reasoning: result.reasoning }), // Include reasoning if present
417
  };
418
  res.json(openAIResponse);
419
- console.log(`[${requestId}] Returned non-stream response.`);
420
  break; // 成功,跳出重试循环
421
  }
422
 
423
  } catch (error) {
424
- lastError = error; // Store the error from this attempt
425
- const status = error?.status; // Fal client errors should have status
426
- const errorMessage = error?.body?.detail || error?.message || 'Unknown error'; // Get detailed message if possible
 
427
 
428
- console.warn(`[${requestId}] Attempt ${attempt + 1} with key index ${keyIndexToTry} failed. Status: ${status || 'N/A'}, Message: ${errorMessage}`);
 
429
 
430
- // 检查是否是与 Key 相关的错误 (401 Unauthorized, 403 Forbidden, 429 Rate Limit)
431
  if (status === 401 || status === 403 || status === 429) {
432
- console.log(`[${requestId}] Key-related error (${status}). Trying next key...`);
433
- // 继续循环尝试下一个 Key
434
  } else {
435
- // 如果是其他类型的错误 (如网络问题、Fal内部服务器错误5xx、请求参数错误400等)
436
- // 通常重试其他 Key 没有意义,直接中断重试
437
- console.error(`[${requestId}] Unrecoverable error encountered. Status: ${status || 'N/A'}. Stopping key rotation for this request.`);
438
- console.error("Error details:", error); // Log the full error object for debugging
439
- break; // 跳出重试循环
440
  }
441
  }
442
  } // --- 结束重试循环 ---
@@ -445,36 +468,31 @@ app.post('/v1/chat/completions', async (req, res) => {
445
  if (!success) {
446
  console.error(`[${requestId}] All Fal Key attempts failed or an unrecoverable error occurred.`);
447
  if (!res.headersSent) {
448
- const statusCode = lastError?.status || 503; // Use status from last error if available, default to 503 Service Unavailable
449
  const errorMessage = (lastError instanceof Error) ? lastError.message : JSON.stringify(lastError);
450
- // Try to extract a more specific message if available
451
- const detailMessage = lastError?.body?.detail || errorMessage;
452
  const errorType = lastError?.type || (statusCode === 401 || statusCode === 403 || statusCode === 429 ? "key_error" : "proxy_error");
453
 
454
  console.error(`[${requestId}] Sending final error response. Status: ${statusCode}, Type: ${errorType}, Message: ${detailMessage}`);
455
 
456
- // 返回一个标准的 OpenAI 错误格式
457
  res.status(statusCode).json({
458
  object: "error",
459
  message: `All Fal Key attempts failed or an unrecoverable error occurred. Last error: ${detailMessage}`,
460
  type: errorType,
461
  param: null,
462
- code: statusCode === 429 ? "rate_limit_exceeded" : (statusCode === 401 || statusCode === 403 ? "invalid_api_key" : null)
463
  });
464
  } else if (!res.writableEnded) {
465
- console.error(`[${requestId}] Headers already sent, but request failed after stream started or during processing. Ending response with error chunk.`);
466
- // 尝试在流式响应中发送错误(如果可能)
467
- try {
468
- const errorDetails = (lastError instanceof Error) ? lastError.message : JSON.stringify(lastError);
469
- const detailMessage = lastError?.body?.detail || errorDetails;
470
- const errorChunk = { id: `chatcmpl-${Date.now()}-final-error`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: {}, finish_reason: "error", message: { role: 'assistant', content: `Proxy Error: All key attempts failed or stream processing error. Last error: ${detailMessage}` } }] };
471
- res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
472
- res.write(`data: [DONE]\n\n`);
473
- res.end();
474
- } catch (e) {
475
- console.error(`[${requestId}] Failed to write final error to stream:`, e);
476
- if (!res.writableEnded) res.end(); // Force end if possible
477
- }
478
  } else {
479
  console.error(`[${requestId}] Request failed, but response stream was already fully ended. Cannot send error.`);
480
  }
@@ -485,7 +503,7 @@ app.post('/v1/chat/completions', async (req, res) => {
485
  // 启动服务器 (更新启动信息)
486
  app.listen(PORT, () => {
487
  console.log(`===================================================`);
488
- console.log(` Fal OpenAI Proxy Server (Key Rotation with Retry + System Top + Separator + Recency)`); // 更新描述
489
  console.log(` Listening on port: ${PORT}`);
490
  console.log(` Loaded ${falKeys.length} Fal AI Keys for rotation.`);
491
  console.log(` Using Limits: System Prompt=${SYSTEM_PROMPT_LIMIT}, Prompt=${PROMPT_LIMIT}`);
@@ -497,5 +515,5 @@ app.listen(PORT, () => {
497
 
498
  // 根路径响应 (更新信息)
499
  app.get('/', (req, res) => {
500
- res.send('Fal OpenAI Proxy (Key Rotation with Retry + System Top + Separator + Recency Strategy) is running.'); // 更新描述
501
  });
 
118
  function convertMessagesToFalPrompt(messages) {
119
  let fixed_system_prompt_content = "";
120
  const conversation_message_blocks = [];
121
+ // console.log(`Original messages count: ${messages.length}`); // Reduced logging verbosity
122
 
123
  // 1. 分离 System 消息,格式化 User/Assistant 消息
124
  for (const message of messages) {
 
145
  fixed_system_prompt_content = fixed_system_prompt_content.substring(0, SYSTEM_PROMPT_LIMIT);
146
  console.warn(`Combined system messages truncated from ${originalLength} to ${SYSTEM_PROMPT_LIMIT}`);
147
  }
 
148
  fixed_system_prompt_content = fixed_system_prompt_content.trim();
149
 
150
 
 
154
  space_occupied_by_fixed_system = fixed_system_prompt_content.length + 4; // 预留 \n\n...\n\n 的长度
155
  }
156
  const remaining_system_limit = Math.max(0, SYSTEM_PROMPT_LIMIT - space_occupied_by_fixed_system);
157
+ // console.log(`Trimmed fixed system prompt length: ${fixed_system_prompt_content.length}. Approx remaining system history limit: ${remaining_system_limit}`);
158
 
159
 
160
  // 4. 反向填充 User/Assistant 对话历史
 
165
  let promptFull = false;
166
  let systemHistoryFull = (remaining_system_limit <= 0);
167
 
168
+ // console.log(`Processing ${conversation_message_blocks.length} user/assistant messages for recency filling.`);
169
  for (let i = conversation_message_blocks.length - 1; i >= 0; i--) {
170
  const message_block = conversation_message_blocks[i];
171
  const block_length = message_block.length;
172
 
173
  if (promptFull && systemHistoryFull) {
174
+ // console.log(`Both prompt and system history slots full. Omitting older messages from index ${i}.`);
175
  break;
176
  }
177
 
 
183
  continue;
184
  } else {
185
  promptFull = true;
186
+ // console.log(`Prompt limit (${PROMPT_LIMIT}) reached. Trying system history slot.`);
187
  }
188
  }
189
 
 
195
  continue;
196
  } else {
197
  systemHistoryFull = true;
198
+ // console.log(`System history limit (${remaining_system_limit}) reached.`);
199
  }
200
  }
201
  }
 
203
  // 5. *** 组合最终的 prompt 和 system_prompt (包含分隔符逻辑) ***
204
  const system_prompt_history_content = system_prompt_history_blocks.join('').trim();
205
  const final_prompt = prompt_history_blocks.join('').trim();
 
 
206
  const SEPARATOR = "\n\n-------下面是比较早之前的对话内容-----\n\n";
 
207
  let final_system_prompt = "";
 
 
208
  const hasFixedSystem = fixed_system_prompt_content.length > 0;
209
  const hasSystemHistory = system_prompt_history_content.length > 0;
210
 
211
  if (hasFixedSystem && hasSystemHistory) {
 
212
  final_system_prompt = fixed_system_prompt_content + SEPARATOR + system_prompt_history_content;
213
+ // console.log("Combining fixed system prompt and history with separator.");
214
  } else if (hasFixedSystem) {
 
215
  final_system_prompt = fixed_system_prompt_content;
216
+ // console.log("Using only fixed system prompt.");
217
  } else if (hasSystemHistory) {
 
218
  final_system_prompt = system_prompt_history_content;
219
+ // console.log("Using only history in system prompt slot.");
220
  }
 
221
 
 
222
  const result = {
223
+ system_prompt: final_system_prompt,
224
+ prompt: final_prompt
225
  };
226
 
227
+ // console.log(`Final system_prompt length (Sys+Separator+Hist): ${result.system_prompt.length}`);
228
+ // console.log(`Final prompt length (Hist): ${result.prompt.length}`);
229
 
230
  return result;
231
  }
232
  // === convertMessagesToFalPrompt 函数结束 ===
233
 
234
 
235
+ // POST /v1/chat/completions endpoint (带 Key 重试逻辑 - Stream 修正版)
236
  app.post('/v1/chat/completions', async (req, res) => {
237
  const { model, messages, stream = false, reasoning = false, ...restOpenAIParams } = req.body;
238
  const requestId = `req-${Date.now()}`; // Unique ID for this incoming request
 
250
  let lastError = null; // Store the last error encountered during key rotation
251
  let success = false; // Flag to indicate if any key succeeded
252
 
253
+ // 准备 Fal Input (只需要准备一次)
254
+ const { prompt, system_prompt } = convertMessagesToFalPrompt(messages);
255
+ const falInput = {
256
+ model: model,
257
+ prompt: prompt,
258
+ ...(system_prompt && { system_prompt: system_prompt }),
259
+ reasoning: !!reasoning,
260
+ };
261
+
262
+ // 打印一次 Fal Input 和 Prompt 信息
263
+ console.log(`[${requestId}] Fal Input (prepared once):`, JSON.stringify(falInput, null, 2));
264
+ console.log(`[${requestId}] System Prompt Length:`, system_prompt?.length || 0);
265
+ console.log(`[${requestId}] Prompt Length:`, prompt?.length || 0);
266
+
267
  // *** 重试循环:尝试最多 falKeys.length 次 ***
268
  for (let attempt = 0; attempt < falKeys.length; attempt++) {
269
  const keyIndexToTry = (currentFalKeyIndex + attempt) % falKeys.length;
 
276
  credentials: selectedFalKey,
277
  });
278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  // --- 执行 Fal AI 调用 ---
280
  if (stream) {
281
  // --- 流式处理 ---
 
283
  res.setHeader('Cache-Control', 'no-cache');
284
  res.setHeader('Connection', 'keep-alive');
285
  res.setHeader('Access-Control-Allow-Origin', '*');
286
+ // !! 不要在这里 flushHeaders !!
287
 
288
  let previousOutput = '';
289
+ let firstEventProcessed = false;
290
+ let streamFailedMidway = false; // Flag for errors after successful start
291
+ let keyConfirmedWorking = false; // Flag if key actually produced data
292
 
293
+ const falStream = await fal.stream("fal-ai/any-llm", { input: falInput });
 
 
 
294
 
295
  // 处理流
296
  try {
 
 
 
 
297
  for await (const event of falStream) {
 
298
  const currentOutput = (event && typeof event.output === 'string') ? event.output : '';
299
  const isPartial = (event && typeof event.partial === 'boolean') ? event.partial : true;
300
  const errorInfo = (event && event.error) ? event.error : null;
301
+ const eventStatus = errorInfo?.status; // Check status within error object if present
302
 
303
+ // --- 检查事件错误 ---
304
  if (errorInfo) {
305
+ console.warn(`[${requestId}] Error received in fal stream event (Key Index ${keyIndexToTry}):`, errorInfo);
306
+ lastError = errorInfo; // Store the error
307
+
308
+ // 如果是第一次事件且是 Key 相关错误 (401/403/429),则中断此 key 的尝试
309
+ if (!firstEventProcessed && (eventStatus === 401 || eventStatus === 403 || eventStatus === 429)) {
310
+ console.warn(`[${requestId}] Key-related error (${eventStatus}) on first stream event for key index ${keyIndexToTry}. Aborting this attempt.`);
311
+ // 不需要发送响应,直接跳出内部循环,让外部循环尝试下一个 key
312
+ break; // Exit the inner `for await...of` loop
313
+ } else {
314
+ // 如果是其他错误,或者非第一次事件的错误,则认为是流处理失败
315
+ console.error(`[${requestId}] Unrecoverable stream error or error after stream start.`);
316
+ streamFailedMidway = true; // Mark stream as failed after start
317
+ if (!res.headersSent) {
318
+ // 如果还没发header,说明key可能一开始就返回错误,直接发送500
319
+ res.status(500).json({ object: "error", message: `Fal Stream Error: ${JSON.stringify(errorInfo)}`, type:"fal_stream_error"});
320
+ console.error(`[${requestId}] Headers not sent, responding with 500 JSON error.`);
321
+ } else if (!res.writableEnded) {
322
+ // 如果已发header,发送错误chunk
323
+ const errorChunk = { id: `chatcmpl-${Date.now()}-error`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: {}, finish_reason: "error", message: { role: 'assistant', content: `Fal Stream Error: ${JSON.stringify(errorInfo)}` } }] };
324
+ res.write(`data: ${JSON.stringify(errorChunk)}\n\n`);
325
+ console.error(`[${requestId}] Headers sent, sending error chunk.`);
326
+ }
327
+ break; // Exit the inner `for await...of` loop
328
  }
 
329
  }
330
 
331
+ // --- 成功接收到第一个非错误事件 ---
332
+ if (!keyConfirmedWorking && !errorInfo) {
333
+ success = true; // Mark overall success *for this request*
334
+ keyConfirmedWorking = true; // Mark this specific key as working
335
+ currentFalKeyIndex = (keyIndexToTry + 1) % falKeys.length; // Update global index for next request
336
+ console.log(`[${requestId}] Key at index ${keyIndexToTry} confirmed working. Next request starts at index ${currentFalKeyIndex}.`);
337
+ if (!res.headersSent) {
338
+ res.flushHeaders();
339
+ console.log(`[${requestId}] Stream headers flushed.`);
340
+ }
341
+ firstEventProcessed = true;
342
  }
 
343
 
344
+ // --- 处理有效数据 ---
345
+ if (!errorInfo) {
346
+ let deltaContent = '';
347
+ if (currentOutput.startsWith(previousOutput)) {
348
+ deltaContent = currentOutput.substring(previousOutput.length);
349
+ } else if (currentOutput.length > 0) {
350
+ console.warn(`[${requestId}] Fal stream output mismatch detected. Sending full current output as delta.`, { previousLength: previousOutput.length, currentLength: currentOutput.length });
351
+ deltaContent = currentOutput;
352
+ previousOutput = ''; // Reset previous if mismatch
353
+ }
354
+ previousOutput = currentOutput;
355
+
356
+ if (deltaContent || !isPartial) {
357
+ const openAIChunk = { id: `chatcmpl-${Date.now()}`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: model, choices: [{ index: 0, delta: { content: deltaContent }, finish_reason: isPartial === false ? "stop" : null }] };
358
+ if (!res.writableEnded) {
359
+ res.write(`data: ${JSON.stringify(openAIChunk)}\n\n`);
360
+ }
361
+ }
362
  }
363
+
364
+ } // End `for await...of` loop
365
+
366
+ // --- 循环后处理 ---
367
+ if (streamFailedMidway) {
368
+ // 如果是因为流中途错误跳出的,确保响应结束
369
+ if (!res.writableEnded) {
370
+ res.write(`data: [DONE]\n\n`); // Send DONE even after error as per OpenAI spec
371
+ res.end();
372
+ console.log(`[${requestId}] Stream ended with [DONE] after mid-stream error.`);
373
+ }
374
+ break; // Exit the outer key retry loop because the stream failed *after* starting
375
+ } else if (keyConfirmedWorking) {
376
+ // 如果 Key 正常工作且循环正常结束 (没有 break)
377
+ if (!res.writableEnded) {
378
+ res.write(`data: [DONE]\n\n`);
 
379
  res.end();
380
+ console.log(`[${requestId}] Stream finished normally and [DONE] sent.`);
381
+ }
382
+ break; // Exit the outer key retry loop because we succeeded
383
+ }
384
+ // If loop finished without confirming the key worked and without mid-stream error (e.g., first event was key error)
385
+ // let the outer loop continue to the next key.
386
+
387
+ } catch (streamProcessingError) {
388
+ // This catches errors in the stream processing *logic* itself, less likely
389
+ console.error(`[${requestId}] Error during fal stream processing loop logic:`, streamProcessingError);
390
+ lastError = streamProcessingError;
391
+ if (!res.headersSent) {
392
+ res.status(500).json({ object: "error", message: `Proxy Stream Processing Error: ${streamProcessingError.message}`, type:"proxy_internal_error"});
393
+ console.error(`[${requestId}] Headers not sent, responding with 500 JSON error for stream logic failure.`);
394
+ } else if (!res.writableEnded) {
395
+ try {
396
+ res.write(`data: ${JSON.stringify({ error: { message: "Proxy Stream processing error", type: "proxy_internal_error", details: streamProcessingError.message } })}\n\n`);
397
+ res.write(`data: [DONE]\n\n`);
398
+ res.end();
399
+ console.error(`[${requestId}] Headers sent, sending error chunk for stream logic failure.`);
400
+ } catch (finalError) {
401
+ console.error(`[${requestId}] Error sending final error message to client:`, finalError);
402
+ if (!res.writableEnded) { res.end(); }
403
  }
 
 
 
404
  }
405
+ break; // Exit the outer key retry loop
 
406
  }
407
+
408
+ // If we reached here and `success` is true, it means the stream finished successfully.
409
+ if (success) {
410
+ break; // Exit the outer key retry loop
411
+ }
412
+ // Otherwise, the stream ended because the first event was a key error, continue the outer loop.
413
+
414
 
415
  } else {
416
+ // --- 非流式处理 (基本不变) ---
417
  console.log(`[${requestId}] Executing non-stream request with key index ${keyIndexToTry}...`);
418
  const result = await fal.subscribe("fal-ai/any-llm", { input: falInput, logs: true });
419
 
 
420
  if (result && result.error) {
421
  console.error(`[${requestId}] Fal-ai returned a business error with key index ${keyIndexToTry}:`, result.error);
 
422
  lastError = new Error(`Fal-ai error: ${JSON.stringify(result.error)}`);
423
+ lastError.status = result.status || 500; // Use status from error if available
424
  lastError.type = "fal_ai_error";
425
+ // Business errors (e.g., bad input) usually shouldn't be retried with other keys
426
+ break; // Exit retry loop
427
  }
428
 
429
  console.log(`[${requestId}] Received non-stream result from fal-ai with key index ${keyIndexToTry}`);
 
 
 
 
 
 
430
 
431
+ success = true; // Mark overall success
432
+ currentFalKeyIndex = (keyIndexToTry + 1) % falKeys.length; // Update global index
433
+ console.log(`[${requestId}] Key at index ${keyIndexToTry} successful (non-stream). Next request starts at index ${currentFalKeyIndex}.`);
434
 
435
  const openAIResponse = {
436
  id: `chatcmpl-${result.requestId || Date.now()}`, object: "chat.completion", created: Math.floor(Date.now() / 1000), model: model,
437
  choices: [{ index: 0, message: { role: "assistant", content: result.output || "" }, finish_reason: "stop" }],
438
+ usage: { prompt_tokens: null, completion_tokens: null, total_tokens: null },
439
+ system_fingerprint: null,
440
+ ...(result.reasoning && { fal_reasoning: result.reasoning }),
441
  };
442
  res.json(openAIResponse);
 
443
  break; // 成功,跳出重试循环
444
  }
445
 
446
  } catch (error) {
447
+ // This outer catch handles errors from fal.config, fal.stream setup (before first event), fal.subscribe setup
448
+ lastError = error;
449
+ const status = error?.status;
450
+ const errorMessage = error?.body?.detail || error?.message || 'Unknown setup error';
451
 
452
+ console.warn(`[${requestId}] Attempt ${attempt + 1} with key index ${keyIndexToTry} failed during setup. Status: ${status || 'N/A'}, Message: ${errorMessage}`);
453
+ console.error("Setup Error details:", error); // Log full error
454
 
455
+ // Check for key-related errors during setup
456
  if (status === 401 || status === 403 || status === 429) {
457
+ console.log(`[${requestId}] Key-related setup error (${status}). Trying next key...`);
458
+ // Continue the outer loop
459
  } else {
460
+ // Unrecoverable setup error (e.g., network, internal fal error)
461
+ console.error(`[${requestId}] Unrecoverable setup error encountered. Status: ${status || 'N/A'}. Stopping key rotation.`);
462
+ break; // Exit the outer key retry loop
 
 
463
  }
464
  }
465
  } // --- 结束重试循环 ---
 
468
  if (!success) {
469
  console.error(`[${requestId}] All Fal Key attempts failed or an unrecoverable error occurred.`);
470
  if (!res.headersSent) {
471
+ const statusCode = lastError?.status || 503; // Use status from last error (could be from setup or first stream event), default 503
472
  const errorMessage = (lastError instanceof Error) ? lastError.message : JSON.stringify(lastError);
473
+ const detailMessage = lastError?.body?.detail || errorMessage; // Prefer detailed message
 
474
  const errorType = lastError?.type || (statusCode === 401 || statusCode === 403 || statusCode === 429 ? "key_error" : "proxy_error");
475
 
476
  console.error(`[${requestId}] Sending final error response. Status: ${statusCode}, Type: ${errorType}, Message: ${detailMessage}`);
477
 
 
478
  res.status(statusCode).json({
479
  object: "error",
480
  message: `All Fal Key attempts failed or an unrecoverable error occurred. Last error: ${detailMessage}`,
481
  type: errorType,
482
  param: null,
483
+ code: statusCode === 429 ? "rate_limit_exceeded" : (statusCode === 401 || statusCode === 403 ? "invalid_api_key" : "service_unavailable")
484
  });
485
  } else if (!res.writableEnded) {
486
+ // This case should be less likely now as stream errors are handled inside the loop
487
+ console.error(`[${requestId}] Headers potentially sent, but request failed. Attempting to end stream.`);
488
+ try {
489
+ // Don't send another error chunk if one might have been sent already
490
+ res.write(`data: [DONE]\n\n`);
491
+ res.end();
492
+ } catch (e) {
493
+ console.error(`[${requestId}] Failed to write final [DONE] to stream:`, e);
494
+ if (!res.writableEnded) res.end();
495
+ }
 
 
 
496
  } else {
497
  console.error(`[${requestId}] Request failed, but response stream was already fully ended. Cannot send error.`);
498
  }
 
503
  // 启动服务器 (更新启动信息)
504
  app.listen(PORT, () => {
505
  console.log(`===================================================`);
506
+ console.log(` Fal OpenAI Proxy Server (Key Rotation with Retry v2 + System Top + Separator + Recency)`); // 更新描述
507
  console.log(` Listening on port: ${PORT}`);
508
  console.log(` Loaded ${falKeys.length} Fal AI Keys for rotation.`);
509
  console.log(` Using Limits: System Prompt=${SYSTEM_PROMPT_LIMIT}, Prompt=${PROMPT_LIMIT}`);
 
515
 
516
  // 根路径响应 (更新信息)
517
  app.get('/', (req, res) => {
518
+ res.send('Fal OpenAI Proxy (Key Rotation with Retry v2 + System Top + Separator + Recency Strategy) is running.'); // 更新描述
519
  });