dede

Sleeping

App Files Files Community

Ge-AI commited on May 13, 2025

Commit

e7fae93

verified ·

1 Parent(s): fb55624

Update openai_ondemand_adapter.py

Browse files

Files changed (1) hide show

openai_ondemand_adapter.py +225 -223

openai_ondemand_adapter.py CHANGED Viewed

@@ -7,7 +7,8 @@ import threading
 import logging
 import os
-CLAUDE_SYSTEM_PROMOT = """The assistant is Claude, created by Anthropic.
 Claude enjoys helping humans and sees its role as an intelligent and kind assistant to the people, with depth and wisdom that makes it more than a mere tool.
@@ -267,18 +268,16 @@ def format_openai_sse_delta(chunk_data_dict):
     """将数据块格式化为 OpenAI SSE (Server-Sent Events) 流格式"""
     return f"data: {json.dumps(chunk_data_dict, ensure_ascii=False)}\n\n"
-# --- chat_completions 辅助函数，用于流式请求的单个尝试 ---
-# 这个函数在 chat_completions 内部定义，或者在它可以访问 ONDEMAND_API_BASE, format_openai_sse_delta 等全局/闭包变量的地方定义
-def _execute_one_stream_attempt(apikey, session_id, query_str, endpoint_id, openai_model_name_for_response, attempt_num_logging):
     """
     执行一次流式请求尝试。
     返回: (generated_sse_strings, accumulated_text_content, api_error_occurred)
     generated_sse_strings: 此尝试生成的所有SSE事件字符串列表。
     accumulated_text_content: 从流中累积的纯文本内容。
-    api_error_occurred: 布尔值，指示此尝试是否遇到可恢复的API错误（例如，非200状态码但被处理为SSE错误事件）。
-                       注意：网络超时等 requests.RequestException 会被直接抛出。
     """
-    url = f"{ONDEMAND_API_BASE}/sessions/{session_id}/query"
     payload = {
         "query": query_str,
         "endpointId": endpoint_id,
@@ -286,26 +285,26 @@ def _execute_one_stream_attempt(apikey, session_id, query_str, endpoint_id, open
         "responseMode": "stream"
     }
     headers = {
-        "apikey": apikey,
         "Content-Type": "application/json",
         "Accept": "text/event-stream"
     }
     generated_sse_strings = []
     accumulated_text_parts = []
-    api_error_handled_as_sse = False # 标记是否已将API错误转换为SSE事件
-    logging.info(f"【流式请求子尝试 {attempt_num_logging}】发送到 OnDemand: Session={session_id}, Endpoint={endpoint_id}, Key={keymgr.display_key(apikey)}")
     try:
         with requests.post(url, json=payload, headers=headers, stream=True, timeout=180) as resp:
             if resp.status_code != 200:
                 api_error_handled_as_sse = True
                 error_text = resp.text
-                logging.error(f"【OnDemand流错误】请求失败 (子尝试 {attempt_num_logging})。状态码: {resp.status_code}, Session: {session_id}, 响应: {error_text[:500]}")
                 error_payload = {
                     "error": {
-                        "message": f"OnDemand API Error (Stream Init, Attempt {attempt_num_logging}): {resp.status_code} - {error_text[:200]}",
                         "type": "on_demand_api_error",
                         "code": resp.status_code
                     }
@@ -315,31 +314,31 @@ def _execute_one_stream_attempt(apikey, session_id, query_str, endpoint_id, open
                 return generated_sse_strings, "".join(accumulated_text_parts), api_error_handled_as_sse
             first_chunk_sent = False
-            last_line_str = "" # 用于检查流是否以[DONE]结束
             for line_bytes in resp.iter_lines():
                 if not line_bytes:
                     continue
                 line_str = line_bytes.decode("utf-8")
-                last_line_str = line_str # 跟踪最后一行，以防流意外终止
                 if line_str.startswith("data:"):
                     data_part = line_str[len("data:"):].strip()
                     if data_part == "[DONE]":
-                        logging.info(f"【OnDemand流】接收到 [DONE] 信号 (子尝试 {attempt_num_logging})。Session: {session_id}")
                         generated_sse_strings.append("data: [DONE]\n\n")
                         break
                     elif data_part.startswith("[ERROR]:"):
-                        api_error_handled_as_sse = True # OnDemand流内错误
                         error_json_str = data_part[len("[ERROR]:"):].strip()
-                        logging.warning(f"【OnDemand流】接收到错误事件 (子尝试 {attempt_num_logging}): {error_json_str}。Session: {session_id}")
                         try:
                             error_obj = json.loads(error_json_str)
                         except json.JSONDecodeError:
                             error_obj = {"message": error_json_str, "type": "on_demand_stream_error_format"}
                         generated_sse_strings.append(format_openai_sse_delta({"error": error_obj}))
-                        generated_sse_strings.append("data: [DONE]\n\n") # 错误后也发送DONE
                         break
                     else:
                         try:
@@ -358,7 +357,6 @@ def _execute_one_stream_attempt(apikey, session_id, query_str, endpoint_id, open
                                     choice_delta["content"] = delta_content
                                 if not choice_delta.get("content") and not choice_delta.get("role"):
-                                    # 避免发送完全空的 delta 对象，除非它是第一个角色块
                                     if not (choice_delta.get("role") and not choice_delta.get("content")):
                                         continue
@@ -367,45 +365,30 @@ def _execute_one_stream_attempt(apikey, session_id, query_str, endpoint_id, open
                                     "object": "chat.completion.chunk",
                                     "created": int(time.time()),
                                     "model": openai_model_name_for_response,
-                                    "choices": [{
-                                        "delta": choice_delta,
-                                        "index": 0,
-                                        "finish_reason": None
-                                    }]
                                 }
                                 generated_sse_strings.append(format_openai_sse_delta(openai_chunk))
                         except json.JSONDecodeError:
-                            logging.warning(f"【OnDemand流】无法解析JSON数据块 (子尝试 {attempt_num_logging}): {data_part[:100]}... Session: {session_id}")
-                            # 可以选择忽略，或者也作为一种错误事件发送
-                            # generated_sse_strings.append(f"event: warning\ndata: Malformed JSON in stream: {data_part[:100]}\n\n")
                             continue
-            # 如果循环正常结束但最后一行不是 [DONE] 且没有API错误，补充一个 [DONE]
             if not last_line_str.startswith("data: [DONE]") and not api_error_handled_as_sse:
-                 logging.info(f"【OnDemand流】(子尝试 {attempt_num_logging}) 流迭代完成，补充发送 [DONE]。Session: {session_id}")
                  generated_sse_strings.append("data: [DONE]\n\n")
-    except requests.exceptions.RequestException as e:
-        # 网络/请求级别错误，应由更上层的重试逻辑（如 with_valid_key_and_session）处理
-        logging.error(f"【OnDemand流】请求过程中发生网络或请求异常 (子尝试 {attempt_num_logging}): {e}, Session: {session_id}", exc_info=False) # exc_info=False for brevity
-        raise # 重要：重新抛出，让调用者处理API Key/网络层面的重试
-    except Exception as e:
-        # 此处捕获在流处理中发生的其他意外Python错误
-        api_error_handled_as_sse = True # 将其视为一种API错误，以便返回错误信息给客户端
-        logging.error(f"【OnDemand流】处理流时发生未知错误 (子尝试 {attempt_num_logging}): {e}, Session: {session_id}", exc_info=True)
         error_payload = {
-            "error": {
-                "message": f"Unknown error during streaming (Attempt {attempt_num_logging}): {str(e)}",
-                "type": "unknown_streaming_error_in_attempt"
-            }
         }
         generated_sse_strings.append(format_openai_sse_delta(error_payload))
         generated_sse_strings.append("data: [DONE]\n\n")
-        # 不重新抛出，因为我们已经格式化了错误信息以便通过SSE发送
     return generated_sse_strings, "".join(accumulated_text_parts).strip(), api_error_handled_as_sse
 @app.route("/v1/chat/completions", methods=["POST"])
 def chat_completions():
     """处理聊天补全请求，模拟 OpenAI /v1/chat/completions 接口"""
@@ -422,166 +405,184 @@ def chat_completions():
     if not isinstance(messages, list) or not messages:
         return jsonify({"error": "'messages' must be a non-empty list."}), 400
-    openai_model_name = request_data.get("model", "gpt-4o") # 默认为 gpt-4o
     target_endpoint_id = get_endpoint_id(openai_model_name)
     is_stream_request = bool(request_data.get("stream", False))
-    # --- 构造发送给 OnDemand 的 query 字符串 ---
     formatted_query_parts = []
     for msg in messages:
         role = msg.get("role", "user").strip().capitalize()
-        content = msg.get("content", "") # content可以是字符串或列表（例如包含图片时）
         content_string = ""
-        if isinstance(content, list): # 处理OpenAI content为列表的情况 (通常用于多模态)
-            # OnDemand的query字段可能只接受文本。这里简单拼接文本部分。
-            # 您可能需要根据OnDemand API如何处理多模态输入来调整此逻辑。
             temp_parts = []
             for item in content:
                 if isinstance(item, dict) and item.get("type") == "text":
                     temp_parts.append(item.get("text", ""))
-                # elif isinstance(item, dict) and item.get("type") == "image_url":
-                #     temp_parts.append("[Image Content Not Transmitted To Text-Only OnDemand Query]") # 示例
-                # 按照用户原始代码逻辑处理 list content
-                elif isinstance(item, dict): # 用户原始逻辑
-                    for k, v_item in item.items(): # 修改变量名 v -> v_item 避免与外层冲突
-                        content_string += f"{k}: {v_item}\n{k}: {v_item}" # 用户原始逻辑
-            if not content_string and temp_parts: # 如果原始逻辑未产生字符串，但有文本部分
                  content_string = "\n".join(filter(None, temp_parts))
         elif isinstance(content, str):
             content_string = content
         content_string = content_string.strip()
-        if not content_string: # 跳过空内容的消息
             continue
-        formatted_query_parts.append(f"<|{role}|>: {content_string}") # 使用用户指定的格式
     if not formatted_query_parts:
         return jsonify({"error": "No valid content found in 'messages'."}), 400
-    start_prompt = CLAUDE_SYSTEM_PROMOT + "\n\n" + """下面是对话历史. 你是Assitant角色，请遵从User指令，并用中文尽可能详细的回复。注意，请直接回复! 请不要在开头提出"根据上下文及历史记录"相关的话语。\n"""
     final_query_to_ondemand = start_prompt + "\n".join(formatted_query_parts)
-    # --- 结束构造 query ---
-    # 内部函数，用于封装实际的API调用逻辑，方便重试和密钥管理
-    def attempt_ondemand_request(current_apikey, current_session_id):
         if is_stream_request:
-            return handle_stream_request(current_apikey, current_session_id, final_query_to_ondemand, target_endpoint_id, openai_model_name)
         else:
-            return handle_non_stream_request(current_apikey, current_session_id, final_query_to_ondemand, target_endpoint_id, openai_model_name)
-    # 装饰器/高阶函数，用于管理API密钥获取、会话创建和重试逻辑
-    def with_valid_key_and_session(action_func):
-        max_retries = len(ONDEMAND_APIKEYS) * 2 if ONDEMAND_APIKEYS else 1
-        retries_count = 0
-        last_exception_seen = None
-        while retries_count < max_retries:
-            selected_apikey = None
             try:
-                selected_apikey = keymgr.get()
-                logging.info(f"【请求处理】使用 API Key: {keymgr.display_key(selected_apikey)}，准备创建新会话...")
-                ondemand_session_id = create_session(selected_apikey)
-                return action_func(selected_apikey, ondemand_session_id)
-            except ValueError as ve:
-                logging.critical(f"【请求处理】KeyManager 错误: {ve}")
-                last_exception_seen = ve
-                break
-            except requests.HTTPError as http_err:
-                last_exception_seen = http_err
-                response = http_err.response
-                logging.warning(f"【请求处理】HTTP 错误发生。状态码: {response.status_code if response else 'N/A'}, Key: {keymgr.display_key(selected_apikey) if selected_apikey else 'N/A'}")
-                if selected_apikey and response is not None:
-                    if response.status_code in (401, 403, 429):
-                        keymgr.mark_bad(selected_apikey)
-                retries_count += 1
-                logging.info(f"【请求处理】尝试次数: {retries_count}/{max_retries}. 等待片刻后重试...")
                 time.sleep(1)
-                continue
-            except requests.exceptions.Timeout as timeout_err: # 更明确地捕获 Timeout
-                last_exception_seen = timeout_err # timeout_err 而不是字符串
-                logging.warning(f"【请求处理】请求超时。Key: {keymgr.display_key(selected_apikey) if selected_apikey else 'N/A'}, Error: {timeout_err}")
-                if selected_apikey:
-                    keymgr.mark_bad(selected_apikey)
-                retries_count += 1
-                logging.info(f"【请求处理】尝试次数: {retries_count}/{max_retries}. 等待片刻后重试...")
-                time.sleep(1)
-                continue
-            except requests.exceptions.RequestException as req_ex: # 其他网络相关错误
-                last_exception_seen = req_ex
-                logging.warning(f"【请求处理】网络请求错误。Key: {keymgr.display_key(selected_apikey) if selected_apikey else 'N/A'}, Error: {req_ex}")
-                if selected_apikey: # 对于一般网络错误，也可能标记key
-                    keymgr.mark_bad(selected_apikey)
-                retries_count += 1
-                logging.info(f"【请求处理】尝试次数: {retries_count}/{max_retries}. 等待片刻后重试...")
-                time.sleep(1)
-                continue
-            except Exception as e:
-                last_exception_seen = e
-                logging.error(f"【请求处理】发生意外的严重错误: {e}", exc_info=True)
-                if selected_apikey:
-                    keymgr.mark_bad(selected_apikey)
-                retries_count += 1
-                # break # 对于非常严重的未知错误，可以选择直接中断
-        error_message = "All attempts to process the request failed after multiple retries."
-        if last_exception_seen:
-            error_message += f" Last known error: {str(last_exception_seen)}"
         logging.error(error_message)
-        return jsonify({"error": "Failed to process request with OnDemand service after multiple retries. Please check service status or API keys."}), 503
-    return with_valid_key_and_session(attempt_ondemand_request)
-def handle_stream_request(apikey, session_id, query_str, endpoint_id, openai_model_name_for_response):
-    """处理流式聊天补全请求，包含空回复重试逻辑"""
     max_empty_response_retries = 5
-    attempt_count = 0
     final_sse_strings_to_yield = []
-    while attempt_count < max_empty_response_retries:
-        attempt_count += 1
-        # _execute_one_stream_attempt 可能会抛出 requests.RequestException (如超时、连接错误)
-        # 这些异常会由 with_valid_key_and_session 捕获并处理 (可能更换key重试)
-        sse_strings_this_attempt, accumulated_text_this_attempt, api_error_in_attempt = \
-            _execute_one_stream_attempt(apikey, session_id, query_str, endpoint_id, openai_model_name_for_response, attempt_count)
-        final_sse_strings_to_yield = sse_strings_this_attempt # 保存当前尝试的结果，无论好坏
-        if api_error_in_attempt:
-            logging.warning(f"【流式请求】尝试 {attempt_count} 时 OnDemand 服务返回错误或处理内部错误，将返回此错误信息给客户端。")
-            break # 退出空回复重试循环，直接使用包含错误信息的 final_sse_strings_to_yield
-        if accumulated_text_this_attempt:
-            logging.info(f"【流式请求】尝试 {attempt_count} 成功获取非空内容。")
-            break # 成功获取内容，退出空回复重试循环
-        # 到这里说明内容为空，且没有API错误
-        logging.warning(f"【流式请求】尝试 {attempt_count} 返回空内容。总共尝试次数 {max_empty_response_retries}。")
-        if attempt_count >= max_empty_response_retries:
-            logging.error(f"【流式请求】达到最大空回复重试次数 ({max_empty_response_retries})。将返回空回复错误。")
-            # 构造一个表示空回复错误的SSE事件
-            empty_error_payload = {
-                "error": {
-                    "message": f"Model returned an empty stream after {max_empty_response_retries} retries.",
-                    "type": "empty_stream_error_after_retries",
-                    "code": "empty_response"
-                }
-            }
-            final_sse_strings_to_yield = [format_openai_sse_delta(empty_error_payload), "data: [DONE]\n\n"]
-            break # 退出循环，使用这个错误信息
-        logging.info(f"【流式请求】空回复，将在1秒后重试。当前尝试 {attempt_count}/{max_empty_response_retries}")
-        time.sleep(1) # 等待1秒再进行下一次空回复重试
-    # 定义最终的生成器，用于Response对象
     def final_generator_for_response():
-        if not final_sse_strings_to_yield: # 以防万一 final_sse_strings_to_yield 为空
-            logging.error("【流式请求】final_sse_strings_to_yield 为空，这不应该发生。返回通用错误。")
             yield format_openai_sse_delta({"error": {"message": "Unexpected empty result in streaming.", "type": "internal_proxy_error"}})
             yield "data: [DONE]\n\n"
         else:
@@ -590,92 +591,92 @@ def handle_stream_request(apikey, session_id, query_str, endpoint_id, openai_mod
     return Response(final_generator_for_response(), content_type='text/event-stream')
-def handle_non_stream_request(apikey, session_id, query_str, endpoint_id, openai_model_name_for_response):
-    """处理非流式聊天补全请求，包含空回复重试逻辑"""
-    url = f"{ONDEMAND_API_BASE}/sessions/{session_id}/query"
-    payload = {
-        "query": query_str,
-        "endpointId": endpoint_id,
-        "pluginIds": [],
-        "responseMode": "sync"
-    }
-    headers = {"apikey": apikey, "Content-Type": "application/json"}
     max_empty_response_retries = 5
-    empty_response_retry_count = 0
-    while empty_response_retry_count < max_empty_response_retries:
-        empty_response_retry_count += 1
-        logging.info(f"【同步请求】尝试 #{empty_response_retry_count}/{max_empty_response_retries}. Session={session_id}, Endpoint={endpoint_id}, Key={keymgr.display_key(apikey)}")
         try:
             resp = requests.post(url, json=payload, headers=headers, timeout=120)
-            resp.raise_for_status() # 检查HTTP错误 (4xx, 5xx) - 这会被 with_valid_key_and_session 捕获
             response_json = resp.json()
             if "data" not in response_json or "answer" not in response_json["data"]:
-                logging.error(f"【OnDemand同步错误】响应格式不符合预期 (尝试 {empty_response_retry_count})。Session: {session_id}, 响应: {str(response_json)[:500]}")
-                # 这种格式错误不计为空回复重试，而是视为API行为异常，可能需要上层重试或失败
-                # 为了简单起见，如果上层 with_valid_key_and_session 不处理这种 ValueError，这里我们直接返回错误
-                # 或者可以抛出自定义异常让上层处理
                 raise ValueError("OnDemand API sync response missing 'data.answer' field.")
             ai_response_content = response_json["data"]["answer"]
-            if ai_response_content is None:
-                ai_response_content = ""
-            if ai_response_content.strip(): # 如果内容非空
-                logging.info(f"【同步请求】尝试 {empty_response_retry_count} 成功获取非空内容。")
                 openai_response_obj = {
-                    "id": "chatcmpl-" + str(uuid.uuid4())[:12],
-                    "object": "chat.completion",
-                    "created": int(time.time()),
                     "model": openai_model_name_for_response,
-                    "choices": [{
-                        "index": 0,
-                        "message": {"role": "assistant", "content": ai_response_content},
-                        "finish_reason": "stop"
-                    }],
                     "usage": {}
                 }
                 return jsonify(openai_response_obj)
-            else: # 内容为空
-                logging.warning(f"【同步请求】尝试 {empty_response_retry_count} 返回空回复。Session: {session_id}")
-                if empty_response_retry_count >= max_empty_response_retries:
-                    logging.error(f"【同步请求】达到最大空回复重试次数 ({max_empty_response_retries})。将返回空回复错误。")
-                    # 返回一个表示错误的JSON响应
                     return jsonify({
                         "error": f"Model returned an empty response after {max_empty_response_retries} retries.",
-                        "id": "chatcmpl-" + str(uuid.uuid4())[:12],
-                        "object": "chat.completion", # 保持对象类型一致
-                        "created": int(time.time()),
                         "model": openai_model_name_for_response,
-                        "choices": [{
-                            "index": 0,
-                            "message": {"role": "assistant", "content": ""}, # 空内容
-                            "finish_reason": "length" # 或 "stop", 或自定义 "empty_response"
-                        }],
                         "usage": {}
-                    }), 500 # 使用 500 Internal Server Error 或 503 Service Unavailable
-                logging.info(f"【同步请求】空回复，将在1秒后重试。当前尝试 {empty_response_retry_count}/{max_empty_response_retries}")
-                time.sleep(1) # 等待1秒再进行下一次空回复重试
-        except requests.exceptions.RequestException as e:
-            # 网络/请求级别错误 (包括 resp.raise_for_status() 引发的 HTTPError)
-            # 这些应由 with_valid_key_and_session 处理 (例如更换API Key重试)
-            logging.warning(f"【同步请求】(尝试 {empty_response_retry_count}) 发生请求级错误: {e}. 将由上层处理重试。")
-            raise # 重新抛出，让 with_valid_key_and_session 处理
-        except (ValueError, KeyError, json.JSONDecodeError) as e:
-            # 解析响应或响应结构错误
-            logging.error(f"【同步请求】(尝试 {empty_response_retry_count}) 处理响应时出错: {e}", exc_info=True)
-            # 这种错误通常不应通过简单的空回复重试解决，可能表明API响应格式已更改或存在问题
-            # 直接返回错误给客户端，或者抛给上层
-            return jsonify({"error": f"Error processing OnDemand sync response: {str(e)}"}), 502 # Bad Gateway
-    # 如果循环结束仍未成功（理论上应该在循环内返回或抛出异常）
-    logging.error(f"【同步请求】意外退出空回复重试循环。这不应该发生。")
     return jsonify({"error": "Unexpected error in non-stream handling after empty response retries."}), 500
@@ -715,6 +716,7 @@ def health_check():
 if __name__ == "__main__":
     log_format = '[%(asctime)s] %(levelname)s in %(module)s (%(funcName)s): %(message)s'
     logging.basicConfig(level=os.environ.get("LOG_LEVEL", "INFO").upper(), format=log_format)
     if not PRIVATE_KEY:

 import logging
 import os
+# Claude System Prompt (as provided by user)
+CLAUDE_SYSTEM_PROMPT = """The assistant is Claude, created by Anthropic.
 Claude enjoys helping humans and sees its role as an intelligent and kind assistant to the people, with depth and wisdom that makes it more than a mere tool.
     """将数据块格式化为 OpenAI SSE (Server-Sent Events) 流格式"""
     return f"data: {json.dumps(chunk_data_dict, ensure_ascii=False)}\n\n"
+def _execute_one_stream_attempt(apikey_for_attempt, session_id_for_attempt, query_str, endpoint_id, openai_model_name_for_response, current_attempt_num_logging):
     """
     执行一次流式请求尝试。
     返回: (generated_sse_strings, accumulated_text_content, api_error_occurred)
     generated_sse_strings: 此尝试生成的所有SSE事件字符串列表。
     accumulated_text_content: 从流中累积的纯文本内容。
+    api_error_occurred: 布尔值，指示此尝试是否遇到可直接转换为SSE错误事件的API错误。
+                       requests.RequestException (如超时) 会被直接抛出。
     """
+    url = f"{ONDEMAND_API_BASE}/sessions/{session_id_for_attempt}/query"
     payload = {
         "query": query_str,
         "endpointId": endpoint_id,
         "responseMode": "stream"
     }
     headers = {
+        "apikey": apikey_for_attempt,
         "Content-Type": "application/json",
         "Accept": "text/event-stream"
     }
     generated_sse_strings = []
     accumulated_text_parts = []
+    api_error_handled_as_sse = False
+    logging.info(f"【流式请求子尝试 {current_attempt_num_logging}】发送到 OnDemand: Session={session_id_for_attempt}, Endpoint={endpoint_id}, Key={keymgr.display_key(apikey_for_attempt)}")
     try:
         with requests.post(url, json=payload, headers=headers, stream=True, timeout=180) as resp:
             if resp.status_code != 200:
                 api_error_handled_as_sse = True
                 error_text = resp.text
+                logging.error(f"【OnDemand流错误】请求失败 (子尝试 {current_attempt_num_logging})。状态码: {resp.status_code}, Session: {session_id_for_attempt}, 响应: {error_text[:500]}")
                 error_payload = {
                     "error": {
+                        "message": f"OnDemand API Error (Stream Init, Attempt {current_attempt_num_logging}): {resp.status_code} - {error_text[:200]}",
                         "type": "on_demand_api_error",
                         "code": resp.status_code
                     }
                 return generated_sse_strings, "".join(accumulated_text_parts), api_error_handled_as_sse
             first_chunk_sent = False
+            last_line_str = ""
             for line_bytes in resp.iter_lines():
                 if not line_bytes:
                     continue
                 line_str = line_bytes.decode("utf-8")
+                last_line_str = line_str
                 if line_str.startswith("data:"):
                     data_part = line_str[len("data:"):].strip()
                     if data_part == "[DONE]":
+                        logging.info(f"【OnDemand流】接收到 [DONE] 信号 (子尝试 {current_attempt_num_logging})。Session: {session_id_for_attempt}")
                         generated_sse_strings.append("data: [DONE]\n\n")
                         break
                     elif data_part.startswith("[ERROR]:"):
+                        api_error_handled_as_sse = True
                         error_json_str = data_part[len("[ERROR]:"):].strip()
+                        logging.warning(f"【OnDemand流】接收到错误事件 (子尝试 {current_attempt_num_logging}): {error_json_str}。Session: {session_id_for_attempt}")
                         try:
                             error_obj = json.loads(error_json_str)
                         except json.JSONDecodeError:
                             error_obj = {"message": error_json_str, "type": "on_demand_stream_error_format"}
                         generated_sse_strings.append(format_openai_sse_delta({"error": error_obj}))
+                        generated_sse_strings.append("data: [DONE]\n\n")
                         break
                     else:
                         try:
                                     choice_delta["content"] = delta_content
                                 if not choice_delta.get("content") and not choice_delta.get("role"):
                                     if not (choice_delta.get("role") and not choice_delta.get("content")):
                                         continue
                                     "object": "chat.completion.chunk",
                                     "created": int(time.time()),
                                     "model": openai_model_name_for_response,
+                                    "choices": [{"delta": choice_delta, "index": 0, "finish_reason": None}]
                                 }
                                 generated_sse_strings.append(format_openai_sse_delta(openai_chunk))
                         except json.JSONDecodeError:
+                            logging.warning(f"【OnDemand流】无法解析JSON (子尝试 {current_attempt_num_logging}): {data_part[:100]}... Session: {session_id_for_attempt}")
                             continue
             if not last_line_str.startswith("data: [DONE]") and not api_error_handled_as_sse:
+                 logging.info(f"【OnDemand流】(子尝试 {current_attempt_num_logging}) 流迭代完成，补充发送 [DONE]。Session: {session_id_for_attempt}")
                  generated_sse_strings.append("data: [DONE]\n\n")
+    except requests.exceptions.RequestException: # Let specific RequestExceptions be handled by the caller
+        raise
+    except Exception as e: # Catch other unexpected errors during stream processing
+        api_error_handled_as_sse = True
+        logging.error(f"【OnDemand流】处理流时发生未知错误 (子尝试 {current_attempt_num_logging}): {e}, Session: {session_id_for_attempt}", exc_info=True)
         error_payload = {
+            "error": {"message": f"Unknown error during streaming (Attempt {current_attempt_num_logging}): {str(e)}", "type": "unknown_streaming_error_in_attempt"}
         }
         generated_sse_strings.append(format_openai_sse_delta(error_payload))
         generated_sse_strings.append("data: [DONE]\n\n")
     return generated_sse_strings, "".join(accumulated_text_parts).strip(), api_error_handled_as_sse
 @app.route("/v1/chat/completions", methods=["POST"])
 def chat_completions():
     """处理聊天补全请求，模拟 OpenAI /v1/chat/completions 接口"""
     if not isinstance(messages, list) or not messages:
         return jsonify({"error": "'messages' must be a non-empty list."}), 400
+    openai_model_name = request_data.get("model", "gpt-4o")
     target_endpoint_id = get_endpoint_id(openai_model_name)
     is_stream_request = bool(request_data.get("stream", False))
     formatted_query_parts = []
     for msg in messages:
         role = msg.get("role", "user").strip().capitalize()
+        content = msg.get("content", "")
         content_string = ""
+        if isinstance(content, list):
             temp_parts = []
             for item in content:
                 if isinstance(item, dict) and item.get("type") == "text":
                     temp_parts.append(item.get("text", ""))
+                elif isinstance(item, dict):
+                    for k, v_item in item.items():
+                        content_string += f"{k}: {v_item}\n{k}: {v_item}"
+            if not content_string and temp_parts:
                  content_string = "\n".join(filter(None, temp_parts))
         elif isinstance(content, str):
             content_string = content
         content_string = content_string.strip()
+        if not content_string:
             continue
+        formatted_query_parts.append(f"<|{role}|>: {content_string}")
     if not formatted_query_parts:
         return jsonify({"error": "No valid content found in 'messages'."}), 400
+    # Use the globally defined CLAUDE_SYSTEM_PROMPT
+    start_prompt = CLAUDE_SYSTEM_PROMPT + "\n\n" + """下面是对话历史. 你是Assitant角色，请遵从User指令，并用中文尽可能详细的回复。注意，请直接回复! 请不要在开头提出"根据上下文及历史记录"相关的话语。\n"""
     final_query_to_ondemand = start_prompt + "\n".join(formatted_query_parts)
+    def attempt_ondemand_request_wrapper(current_apikey_from_wrapper, current_session_id_from_wrapper):
+        # This inner function is what with_valid_key_and_session calls.
+        # It receives the *initial* apikey and session_id.
         if is_stream_request:
+            return handle_stream_request(current_apikey_from_wrapper, current_session_id_from_wrapper, final_query_to_ondemand, target_endpoint_id, openai_model_name)
         else:
+            return handle_non_stream_request(current_apikey_from_wrapper, current_session_id_from_wrapper, final_query_to_ondemand, target_endpoint_id, openai_model_name)
+    def with_valid_key_and_session(action_func_to_wrap):
+        # This is the outer retry loop for API key/session issues for the *first* attempt of action_func_to_wrap
+        max_key_retries = len(ONDEMAND_APIKEYS) * 2 if ONDEMAND_APIKEYS else 1
+        key_retry_count = 0
+        last_exception_for_key_retry = None
+        while key_retry_count < max_key_retries:
+            key_retry_count += 1
+            selected_apikey_for_outer_retry = None
             try:
+                selected_apikey_for_outer_retry = keymgr.get()
+                logging.info(f"【请求处理 - Key轮换尝试 {key_retry_count}/{max_key_retries}】使用 API Key: {keymgr.display_key(selected_apikey_for_outer_retry)}，准备创建新会话...")
+                ondemand_session_id_for_outer_retry = create_session(selected_apikey_for_outer_retry)
+                # Call the action_func_to_wrap (which is attempt_ondemand_request_wrapper)
+                # This action_func_to_wrap will then call handle_stream_request or handle_non_stream_request
+                # If handle_..._request fails its *first* attempt due to RequestException, it re-raises it here.
+                return action_func_to_wrap(selected_apikey_for_outer_retry, ondemand_session_id_for_outer_retry)
+            except ValueError as ve: # keymgr.get() failed
+                logging.critical(f"【请求处理 - Key轮换尝试 {key_retry_count}】KeyManager 错误: {ve}")
+                last_exception_for_key_retry = ve
+                break # Cannot get any key, fatal for this request.
+            except requests.exceptions.RequestException as http_err_outer: # Covers create_session failure or re-raised error from action_func's first attempt
+                last_exception_for_key_retry = http_err_outer
+                logging.warning(f"【请求处理 - Key轮换尝试 {key_retry_count}】HTTP/请求错误。Key: {keymgr.display_key(selected_apikey_for_outer_retry) if selected_apikey_for_outer_retry else 'N/A'}, Error: {http_err_outer}")
+                if selected_apikey_for_outer_retry: # If a key was involved in this failure
+                    keymgr.mark_bad(selected_apikey_for_outer_retry)
+                if key_retry_count >= max_key_retries:
+                    logging.error(f"【请求处理】所有Key轮换尝试均失败。最后错误: {last_exception_for_key_retry}")
+                    break # Exhausted key retries
+                logging.info(f"【请求处理】Key轮换尝试 {key_retry_count} 失败，等待后重试下一个Key...")
                 time.sleep(1)
+                continue # To the next iteration of the key_retry_count loop
+            except Exception as e_outer: # Other unexpected errors during the initial setup/call
+                last_exception_for_key_retry = e_outer
+                logging.error(f"【请求处理 - Key轮换尝试 {key_retry_count}】发生意外严重错误: {e_outer}", exc_info=True)
+                if selected_apikey_for_outer_retry:
+                    keymgr.mark_bad(selected_apikey_for_outer_retry)
+                # For truly unexpected errors, might be better to fail fast
+                break # Break outer retry loop
+        error_message = "All attempts to process the request failed after multiple key/session retries."
+        if last_exception_for_key_retry:
+            error_message += f" Last known error during key/session phase: {str(last_exception_for_key_retry)}"
         logging.error(error_message)
+        return jsonify({"error": error_message}), 503
+    return with_valid_key_and_session(attempt_ondemand_request_wrapper)
+def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoint_id, openai_model_name_for_response):
+    """处理流式聊天补全请求，包含空回复重试逻辑 (每次空回复重试使用新Key/Session)"""
     max_empty_response_retries = 5
+    empty_retry_attempt_num = 0
+    current_apikey_for_attempt = initial_apikey
+    current_session_id_for_attempt = initial_session_id
     final_sse_strings_to_yield = []
+    while empty_retry_attempt_num < max_empty_response_retries:
+        empty_retry_attempt_num += 1
+        if empty_retry_attempt_num > 1: # This is an empty-response retry, get new key/session
+            logging.info(f"【流式请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session...")
+            try:
+                current_apikey_for_attempt = keymgr.get()
+                current_session_id_for_attempt = create_session(current_apikey_for_attempt)
+                logging.info(f"【流式请求-空回复重试 {empty_retry_attempt_num-1}】新Key/Session获取成功: Key={keymgr.display_key(current_apikey_for_attempt)}, Session={current_session_id_for_attempt}")
+            except (ValueError, requests.exceptions.RequestException) as e_key_session:
+                logging.warning(f"【流式请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session失败: {e_key_session}")
+                if current_apikey_for_attempt and not isinstance(e_key_session, ValueError): # If key was obtained but create_session failed
+                    keymgr.mark_bad(current_apikey_for_attempt)
+                if empty_retry_attempt_num >= max_empty_response_retries:
+                    final_sse_strings_to_yield = [
+                        format_openai_sse_delta({"error": {"message": f"Failed to get new key/session for final empty stream retry. Error: {str(e_key_session)}", "type": "internal_proxy_error"}}),
+                        "data: [DONE]\n\n"
+                    ]
+                    break # Break empty retry loop
+                time.sleep(1)
+                current_apikey_for_attempt = None # Ensure it's reset if keymgr.get() failed
+                continue # Next iteration of empty_retry_attempt_num loop
+        # Log which attempt this is (1st overall, or nth empty-response retry)
+        log_attempt_str = f"初始尝试" if empty_retry_attempt_num == 1 else f"空回复重试 {empty_retry_attempt_num-1}"
+        try:
+            sse_strings_this_attempt, accumulated_text_this_attempt, api_error_in_attempt = \
+                _execute_one_stream_attempt(current_apikey_for_attempt, current_session_id_for_attempt, query_str, endpoint_id, openai_model_name_for_response, f"{log_attempt_str} (Overall attempt {empty_retry_attempt_num})")
+            final_sse_strings_to_yield = sse_strings_this_attempt
+            if api_error_in_attempt:
+                logging.warning(f"【流式请求】({log_attempt_str}) OnDemand 服务返回错误或处理内部错误，将返回此错误信息。")
+                break
+            if accumulated_text_this_attempt:
+                logging.info(f"【流式请求】({log_attempt_str}) 成功获取非空内容。")
+                break
+            logging.warning(f"【流式请求】({log_attempt_str}) 返回空内容。")
+            if empty_retry_attempt_num >= max_empty_response_retries:
+                logging.error(f"【流式请求】达到最大空回复重试次数 ({max_empty_response_retries})。")
+                empty_error_payload = {
+                    "error": {"message": f"Model returned an empty stream after {max_empty_response_retries} retries.", "type": "empty_stream_error_after_retries", "code": "empty_response"}
+                }
+                final_sse_strings_to_yield = [format_openai_sse_delta(empty_error_payload), "data: [DONE]\n\n"]
+                break
+            logging.info(f"【流式请求】空回复，将在1秒后重试下一个Key。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
+            time.sleep(1)
+        except requests.exceptions.RequestException as e_req:
+            logging.warning(f"【流式请求】({log_attempt_str} using key {keymgr.display_key(current_apikey_for_attempt)}) 发生请求级错误: {e_req}")
+            keymgr.mark_bad(current_apikey_for_attempt)
+            if empty_retry_attempt_num == 1: # This was the initial_apikey provided by with_valid_key_and_session
+                raise e_req # Re-raise for the outer key retry mechanism
+            # If it's an empty-response retry (attempt_num > 1) that failed with RequestException
+            if empty_retry_attempt_num >= max_empty_response_retries:
+                logging.error(f"【流式请求】在最后一次空回复重试时发生请求错误。")
+                final_sse_strings_to_yield = [
+                    format_openai_sse_delta({"error": {"message": f"Request failed on final empty stream retry attempt: {str(e_req)}", "type": "internal_proxy_error"}}),
+                    "data: [DONE]\n\n"
+                ]
+                break # Break empty retry loop
+            time.sleep(1)
+            # Loop continues, will try to get another new key for the next empty-response retry
+            continue
     def final_generator_for_response():
+        if not final_sse_strings_to_yield:
+            logging.error("【流式请求】final_sse_strings_to_yield 为空，返回通用错误。")
             yield format_openai_sse_delta({"error": {"message": "Unexpected empty result in streaming.", "type": "internal_proxy_error"}})
             yield "data: [DONE]\n\n"
         else:
     return Response(final_generator_for_response(), content_type='text/event-stream')
+def handle_non_stream_request(initial_apikey, initial_session_id, query_str, endpoint_id, openai_model_name_for_response):
+    """处理非流式聊天补全请求，包含空回复重试逻辑 (每次空回复重试使用新Key/Session)"""
     max_empty_response_retries = 5
+    empty_retry_attempt_num = 0 # Counts total attempts including initial one
+    current_apikey_for_attempt = initial_apikey
+    current_session_id_for_attempt = initial_session_id
+    url = f"{ONDEMAND_API_BASE}/sessions/{current_session_id_for_attempt}/query" # URL will change if session_id changes
+    while empty_retry_attempt_num < max_empty_response_retries:
+        empty_retry_attempt_num += 1
+        if empty_retry_attempt_num > 1: # This is an empty-response retry, get new key/session
+            logging.info(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session...")
+            try:
+                current_apikey_for_attempt = keymgr.get()
+                current_session_id_for_attempt = create_session(current_apikey_for_attempt)
+                url = f"{ONDEMAND_API_BASE}/sessions/{current_session_id_for_attempt}/query" # Update URL with new session
+                logging.info(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】新Key/Session获取成功: Key={keymgr.display_key(current_apikey_for_attempt)}, Session={current_session_id_for_attempt}")
+            except (ValueError, requests.exceptions.RequestException) as e_key_session:
+                logging.warning(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session失败: {e_key_session}")
+                if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
+                    keymgr.mark_bad(current_apikey_for_attempt)
+                if empty_retry_attempt_num >= max_empty_response_retries:
+                    return jsonify({"error": f"Failed to get new key/session for final empty response retry. Error: {str(e_key_session)}"}), 503
+                time.sleep(1)
+                current_apikey_for_attempt = None
+                continue
+        log_attempt_str = f"初始尝试" if empty_retry_attempt_num == 1 else f"空回复重试 {empty_retry_attempt_num-1}"
+        logging.info(f"【同步请求】({log_attempt_str}, 总尝试 {empty_retry_attempt_num}) Session={current_session_id_for_attempt}, Key={keymgr.display_key(current_apikey_for_attempt)}")
+        payload = { "query": query_str, "endpointId": endpoint_id, "pluginIds": [], "responseMode": "sync" }
+        headers = {"apikey": current_apikey_for_attempt, "Content-Type": "application/json"}
         try:
             resp = requests.post(url, json=payload, headers=headers, timeout=120)
+            resp.raise_for_status()
             response_json = resp.json()
             if "data" not in response_json or "answer" not in response_json["data"]:
+                logging.error(f"【OnDemand同步错误】响应格式不符合预期 ({log_attempt_str})。Session: {current_session_id_for_attempt}, 响应: {str(response_json)[:500]}")
                 raise ValueError("OnDemand API sync response missing 'data.answer' field.")
             ai_response_content = response_json["data"]["answer"]
+            if ai_response_content is None: ai_response_content = ""
+            if ai_response_content.strip():
+                logging.info(f"【同步请求】({log_attempt_str}) 成功获取非空内容。")
                 openai_response_obj = {
+                    "id": "chatcmpl-" + str(uuid.uuid4())[:12], "object": "chat.completion", "created": int(time.time()),
                     "model": openai_model_name_for_response,
+                    "choices": [{"index": 0, "message": {"role": "assistant", "content": ai_response_content}, "finish_reason": "stop"}],
                     "usage": {}
                 }
                 return jsonify(openai_response_obj)
+            else:
+                logging.warning(f"【同步请求】({log_attempt_str}) 返回空回复。")
+                if empty_retry_attempt_num >= max_empty_response_retries:
+                    logging.error(f"【同步请求】达到最大空回复重试次数 ({max_empty_response_retries})。")
                     return jsonify({
                         "error": f"Model returned an empty response after {max_empty_response_retries} retries.",
+                        "id": "chatcmpl-" + str(uuid.uuid4())[:12], "object": "chat.completion", "created": int(time.time()),
                         "model": openai_model_name_for_response,
+                        "choices": [{"index": 0, "message": {"role": "assistant", "content": ""}, "finish_reason": "length"}],
                         "usage": {}
+                    }), 500
+                logging.info(f"【同步请求】空回复，将在1秒后重试下一个Key。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
+                time.sleep(1)
+        except requests.exceptions.RequestException as e_req:
+            logging.warning(f"【同步请求】({log_attempt_str} using key {keymgr.display_key(current_apikey_for_attempt)}) 发生请求级错误: {e_req}")
+            keymgr.mark_bad(current_apikey_for_attempt)
+            if empty_retry_attempt_num == 1:
+                raise e_req
+            if empty_retry_attempt_num >= max_empty_response_retries:
+                logging.error(f"【同步请求】在最后一次空回复重试时发生请求错误。")
+                return jsonify({"error": f"Request failed on final empty response retry attempt. Last error: {str(e_req)}"}), 503
+            time.sleep(1)
+            continue
+        except (ValueError, KeyError, json.JSONDecodeError) as e_parse:
+            logging.error(f"【同步请求】({log_attempt_str}) 处理响应时出错: {e_parse}", exc_info=True)
+            return jsonify({"error": f"Error processing OnDemand sync response: {str(e_parse)}"}), 502
+    logging.error(f"【同步请求】意外退出空回复重试循环。") # Should be unreachable
     return jsonify({"error": "Unexpected error in non-stream handling after empty response retries."}), 500
 if __name__ == "__main__":
     log_format = '[%(asctime)s] %(levelname)s in %(module)s (%(funcName)s): %(message)s'
+    # Use LOG_LEVEL from env if set, otherwise default to INFO
     logging.basicConfig(level=os.environ.get("LOG_LEVEL", "INFO").upper(), format=log_format)
     if not PRIVATE_KEY: