dede

Sleeping

App Files Files Community

Ge-AI commited on May 19, 2025

Commit

6de98b8

verified ·

1 Parent(s): 9b8516b

Update openai_ondemand_adapter.py

Browse files

Files changed (1) hide show

openai_ondemand_adapter.py +291 -209

openai_ondemand_adapter.py CHANGED Viewed

@@ -238,13 +238,7 @@ def create_session(apikey, external_user_id=None, plugin_ids=None):
 def format_openai_sse_delta(chunk_data_dict):
     return f"data: {json.dumps(chunk_data_dict, ensure_ascii=False)}\n\n"
-# Modified: This function is now a generator and returns accumulated text + error flag at the end.
 def _execute_one_stream_attempt(apikey_for_attempt, session_id_for_attempt, query_str, endpoint_id, openai_model_name_for_response, current_attempt_num_logging):
-    """
-    Executes one streaming request attempt.
-    Yields SSE event strings.
-    Returns a tuple: (accumulated_text_content, api_error_yielded_flag)
-    """
     url = f"{ONDEMAND_API_BASE}/sessions/{session_id_for_attempt}/query"
     payload = {
         "query": query_str,
@@ -259,113 +253,125 @@ def _execute_one_stream_attempt(apikey_for_attempt, session_id_for_attempt, quer
     }
     accumulated_text_parts = []
-    api_error_yielded = False # Flag to indicate if an API error was processed and yielded as SSE
-    logging.info(f"【流式请求子尝试 {current_attempt_num_logging}】发送到 OnDemand: Session={session_id_for_attempt}, Endpoint={endpoint_id}, Key={keymgr.display_key(apikey_for_attempt)}")
-    try:
-        with requests.post(url, json=payload, headers=headers, stream=True, timeout=180) as resp:
-            if resp.status_code != 200:
-                api_error_yielded = True
-                error_text = resp.text
-                logging.error(f"【OnDemand流错误】请求失败 (子尝试 {current_attempt_num_logging})。状态码: {resp.status_code}, Session: {session_id_for_attempt}, 响应: {error_text[:500]}")
-                error_payload = {
-                    "error": {
-                        "message": f"OnDemand API Error (Stream Init, Attempt {current_attempt_num_logging}): {resp.status_code} - {error_text[:200]}",
-                        "type": "on_demand_api_error",
-                        "code": resp.status_code
-                    }
-                }
-                yield format_openai_sse_delta(error_payload)
-                yield "data: [DONE]\n\n"
-                return "".join(accumulated_text_parts).strip(), api_error_yielded # Return after yielding error
-            first_chunk_sent = False
-            last_line_str = ""
-            for line_bytes in resp.iter_lines():
-                if not line_bytes:
-                    continue
-                line_str = line_bytes.decode("utf-8")
-                last_line_str = line_str
-                if line_str.startswith("data:"):
-                    data_part = line_str[len("data:"):].strip()
-                    if data_part == "[DONE]":
-                        logging.info(f"【OnDemand流】接收到 [DONE] 信号 (子尝试 {current_attempt_num_logging})。Session: {session_id_for_attempt}")
                         yield "data: [DONE]\n\n"
-                        # No break here, let the function return after the loop
                         return "".join(accumulated_text_parts).strip(), api_error_yielded
-                    elif data_part.startswith("[ERROR]:"):
-                        api_error_yielded = True
-                        error_json_str = data_part[len("[ERROR]:"):].strip()
-                        logging.warning(f"【OnDemand流】接收到错误事件 (子尝试 {current_attempt_num_logging}): {error_json_str}。Session: {session_id_for_attempt}")
-                        try:
-                            error_obj = json.loads(error_json_str)
-                        except json.JSONDecodeError:
-                            error_obj = {"message": error_json_str, "type": "on_demand_stream_error_format"}
-                        yield format_openai_sse_delta({"error": error_obj})
-                        yield "data: [DONE]\n\n"
-                        return "".join(accumulated_text_parts).strip(), api_error_yielded # Return after yielding error
-                    else:
-                        try:
-                            event_data = json.loads(data_part)
-                            if event_data.get("eventType") == "fulfillment":
-                                delta_content = event_data.get("answer", "")
-                                if delta_content is None: delta_content = ""
-                                accumulated_text_parts.append(delta_content)
-                                choice_delta = {}
-                                if not first_chunk_sent:
-                                    choice_delta["role"] = "assistant"
-                                    choice_delta["content"] = delta_content
-                                    first_chunk_sent = True
-                                else:
-                                    choice_delta["content"] = delta_content
-                                if not choice_delta.get("content") and not choice_delta.get("role"):
-                                    if not (choice_delta.get("role") and not choice_delta.get("content")):
-                                        continue
-                                openai_chunk = {
-                                    "id": "chatcmpl-" + str(uuid.uuid4())[:12],
-                                    "object": "chat.completion.chunk",
-                                    "created": int(time.time()),
-                                    "model": openai_model_name_for_response,
-                                    "choices": [{"delta": choice_delta, "index": 0, "finish_reason": None}]
-                                }
-                                yield format_openai_sse_delta(openai_chunk) # Yield immediately
-                        except json.JSONDecodeError:
-                            logging.warning(f"【OnDemand流】无法解析JSON (子尝试 {current_attempt_num_logging}): {data_part[:100]}... Session: {session_id_for_attempt}")
-                            continue
-            # This part is reached if the loop finishes without an explicit [DONE] or [ERROR] from the stream data itself.
-            # This might happen if the stream just ends.
-            if not api_error_yielded: # If no error was yielded, and no [DONE] was in data, yield a [DONE]
-                if not last_line_str.startswith("data: [DONE]"): # Check if last processed line was not already DONE
                      logging.info(f"【OnDemand流】(子尝试 {current_attempt_num_logging}) 流迭代完成，补充发送 [DONE]。Session: {session_id_for_attempt}")
                      yield "data: [DONE]\n\n"
-    except requests.exceptions.RequestException:
-        # Network/request level errors before or during streaming
-        # These should be caught by the caller (handle_stream_request) to decide on retries
-        logging.error(f"【OnDemand流】请求过程中发生网络或请求异常 (子尝试 {current_attempt_num_logging}): Session: {session_id_for_attempt}", exc_info=False)
-        raise # Re-raise for the caller to handle
-    except Exception as e:
-        # Unexpected Python errors during stream processing
-        logging.error(f"【OnDemand流】处理流时发生未知错误 (子尝试 {current_attempt_num_logging}): {e}, Session: {session_id_for_attempt}", exc_info=True)
-        api_error_yielded = True # Mark that an error occurred and we are yielding an SSE for it
-        error_payload = {
-            "error": {"message": f"Unknown error during streaming (Attempt {current_attempt_num_logging}): {str(e)}", "type": "unknown_streaming_error_in_attempt"}
-        }
-        yield format_openai_sse_delta(error_payload)
-        yield "data: [DONE]\n\n"
-    return "".join(accumulated_text_parts).strip(), api_error_yielded
 @app.route("/v1/chat/completions", methods=["POST"])
@@ -418,7 +424,6 @@ def chat_completions():
     def attempt_ondemand_request_wrapper(current_apikey_from_wrapper, current_session_id_from_wrapper):
         if is_stream_request:
-            # Pass the generator directly to Response
             return Response(
                 handle_stream_request(current_apikey_from_wrapper, current_session_id_from_wrapper, final_query_to_ondemand, target_endpoint_id, openai_model_name),
                 content_type='text/event-stream'
@@ -446,10 +451,22 @@ def chat_completions():
                 break
             except requests.exceptions.RequestException as http_err_outer:
                 last_exception_for_key_retry = http_err_outer
-                logging.warning(f"【请求处理 - Key轮换尝试 {key_retry_count}】HTTP/请求错误。Key: {keymgr.display_key(selected_apikey_for_outer_retry) if selected_apikey_for_outer_retry else 'N/A'}, Error: {http_err_outer}")
-                if selected_apikey_for_outer_retry:
-                    keymgr.mark_bad(selected_apikey_for_outer_retry)
                 if key_retry_count >= max_key_retries:
                     logging.error(f"【请求处理】所有Key轮换尝试均失败。最后错误: {last_exception_for_key_retry}")
                     break
@@ -460,36 +477,26 @@ def chat_completions():
             except Exception as e_outer:
                 last_exception_for_key_retry = e_outer
                 logging.error(f"【请求处理 - Key轮换尝试 {key_retry_count}】发生意外严重错误: {e_outer}", exc_info=True)
-                if selected_apikey_for_outer_retry:
                     keymgr.mark_bad(selected_apikey_for_outer_retry)
                 break
-        error_message = "All attempts to process the request failed after multiple key/session retries."
-        if last_exception_for_key_retry:
-            error_message += f" Last known error during key/session phase: {str(last_exception_for_key_retry)}"
-        logging.error(error_message)
-        # For stream requests, if with_valid_key_and_session fails, we can't return jsonify directly
-        # This part might need adjustment if the action_func_to_wrap for stream is expected to return a Response object
-        # However, if action_func_to_wrap (attempt_ondemand_request_wrapper) for stream returns a Response,
-        # then this jsonify will only be hit if create_session or keymgr.get fails repeatedly.
         if is_stream_request:
-             # Construct a generator that yields an error SSE
             def error_stream_gen():
-                yield format_openai_sse_delta({"error": {"message": error_message, "type": "proxy_setup_error", "code": 503}})
                 yield "data: [DONE]\n\n"
-            return Response(error_stream_gen(), content_type='text/event-stream', status=503)
         else:
-            return jsonify({"error": error_message}), 503
     return with_valid_key_and_session(attempt_ondemand_request_wrapper)
-# Modified: This function is now a generator that uses `yield from`
 def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoint_id, openai_model_name_for_response):
-    """
-    Handles streaming chat completion requests with empty response retries.
-    Each empty response retry uses a new API key and session.
-    Yields SSE event strings directly.
-    """
     max_empty_response_retries = 5
     empty_retry_attempt_num = 0
@@ -499,7 +506,7 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
     while empty_retry_attempt_num < max_empty_response_retries:
         empty_retry_attempt_num += 1
         accumulated_text_this_attempt = ""
-        api_error_in_attempt = False
         if empty_retry_attempt_num > 1:
             logging.info(f"【流式请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session...")
@@ -509,10 +516,23 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
                 logging.info(f"【流式请求-空回复重试 {empty_retry_attempt_num-1}】新Key/Session获取成功: Key={keymgr.display_key(current_apikey_for_attempt)}, Session={current_session_id_for_attempt}")
             except (ValueError, requests.exceptions.RequestException) as e_key_session:
                 logging.warning(f"【流式请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session失败: {e_key_session}")
                 if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
-                    keymgr.mark_bad(current_apikey_for_attempt)
                 if empty_retry_attempt_num >= max_empty_response_retries:
-                    yield format_openai_sse_delta({"error": {"message": f"Failed to get new key/session for final empty stream retry. Error: {str(e_key_session)}", "type": "internal_proxy_error"}})
                     yield "data: [DONE]\n\n"
                     return
                 time.sleep(1)
@@ -522,8 +542,6 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
         log_attempt_str = f"初始尝试" if empty_retry_attempt_num == 1 else f"空回复重试 {empty_retry_attempt_num-1}"
         try:
-            # Yield from the sub-generator; result_tuple will be (accumulated_text, api_error_yielded_flag)
-            # This is where the true streaming to the client happens chunk by chunk.
             result_tuple = yield from _execute_one_stream_attempt(
                 current_apikey_for_attempt,
                 current_session_id_for_attempt,
@@ -533,64 +551,74 @@ def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoin
                 f"{log_attempt_str} (Overall attempt {empty_retry_attempt_num})"
             )
             accumulated_text_this_attempt = result_tuple[0]
-            api_error_in_attempt = result_tuple[1]
-        except requests.exceptions.RequestException as e_req:
-            logging.warning(f"【流式请求】({log_attempt_str} using key {keymgr.display_key(current_apikey_for_attempt)}) 发生请求级错误: {e_req}")
-            if current_apikey_for_attempt: # Ensure key is marked bad if one was used
-                 keymgr.mark_bad(current_apikey_for_attempt)
             if empty_retry_attempt_num == 1:
-                # This was the initial_apikey. Re-raise for the outer key retry mechanism in with_valid_key_and_session.
-                # The with_valid_key_and_session will then try a new key for the *entire* operation.
                 raise e_req
-            # If it's an empty-response retry (attempt_num > 1) that failed with RequestException
             if empty_retry_attempt_num >= max_empty_response_retries:
-                logging.error(f"【流式请求】在最后一次空回复重试时发生请求错误。")
-                yield format_openai_sse_delta({"error": {"message": f"Request failed on final empty stream retry attempt: {str(e_req)}", "type": "internal_proxy_error"}})
                 yield "data: [DONE]\n\n"
                 return
             time.sleep(1)
-            continue # To the next iteration of the empty_retry_attempt_num loop (will try new key/session)
-        # Check results after _execute_one_stream_attempt has finished for this attempt
         if api_error_in_attempt:
-            logging.warning(f"【流式请求】({log_attempt_str}) OnDemand 服务返回错误或处理内部错误，已将错误信息流式传输。")
-            # Error already yielded by _execute_one_stream_attempt, so we just stop.
             return
         if accumulated_text_this_attempt:
             logging.info(f"【流式请求】({log_attempt_str}) 成功获取非空内容。")
-            # Content already yielded by _execute_one_stream_attempt. We are done.
             return
-        # If we reach here, content was empty and no API error was yielded by _execute_one_stream_attempt
         logging.warning(f"【流式请求】({log_attempt_str}) 返回空内容。")
         if empty_retry_attempt_num >= max_empty_response_retries:
-            logging.error(f"【流式请求】达到最大空回复重试次数 ({max_empty_response_retries})。")
             yield format_openai_sse_delta({
-                "error": {"message": f"Model returned an empty stream after {max_empty_response_retries} retries.", "type": "empty_stream_error_after_retries", "code": "empty_response"}
             })
             yield "data: [DONE]\n\n"
             return
         logging.info(f"【流式请求】空回复，将在1秒后重试下一个Key。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
         time.sleep(1)
-    # Fallback if loop finishes unexpectedly (shouldn't happen with current logic)
-    logging.error("【流式请求】意外退出空回复重试循环。")
-    yield format_openai_sse_delta({"error": {"message": "Unexpected error in stream handling.", "type": "internal_proxy_error"}})
     yield "data: [DONE]\n\n"
 def handle_non_stream_request(initial_apikey, initial_session_id, query_str, endpoint_id, openai_model_name_for_response):
-    max_empty_response_retries = 5
     empty_retry_attempt_num = 0
     current_apikey_for_attempt = initial_apikey
     current_session_id_for_attempt = initial_session_id
-    url = f"{ONDEMAND_API_BASE}/sessions/{current_session_id_for_attempt}/query"
     while empty_retry_attempt_num < max_empty_response_retries:
         empty_retry_attempt_num += 1
@@ -600,76 +628,130 @@ def handle_non_stream_request(initial_apikey, initial_session_id, query_str, end
             try:
                 current_apikey_for_attempt = keymgr.get()
                 current_session_id_for_attempt = create_session(current_apikey_for_attempt)
-                url = f"{ONDEMAND_API_BASE}/sessions/{current_session_id_for_attempt}/query"
                 logging.info(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】新Key/Session获取成功: Key={keymgr.display_key(current_apikey_for_attempt)}, Session={current_session_id_for_attempt}")
             except (ValueError, requests.exceptions.RequestException) as e_key_session:
                 logging.warning(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session失败: {e_key_session}")
                 if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
-                    keymgr.mark_bad(current_apikey_for_attempt)
                 if empty_retry_attempt_num >= max_empty_response_retries:
-                    return jsonify({"error": f"Failed to get new key/session for final empty response retry. Error: {str(e_key_session)}"}), 503
                 time.sleep(1)
                 current_apikey_for_attempt = None
                 continue
         log_attempt_str = f"初始尝试" if empty_retry_attempt_num == 1 else f"空回复重试 {empty_retry_attempt_num-1}"
-        logging.info(f"【同步请求】({log_attempt_str}, 总尝试 {empty_retry_attempt_num}) Session={current_session_id_for_attempt}, Key={keymgr.display_key(current_apikey_for_attempt)}")
-        payload = { "query": query_str, "endpointId": endpoint_id, "pluginIds": [], "responseMode": "sync" }
-        headers = {"apikey": current_apikey_for_attempt, "Content-Type": "application/json"}
-        try:
-            resp = requests.post(url, json=payload, headers=headers, timeout=120)
-            resp.raise_for_status()
-            response_json = resp.json()
-            if "data" not in response_json or "answer" not in response_json["data"]:
-                logging.error(f"【OnDemand同步错误】响应格式不符合预期 ({log_attempt_str})。Session: {current_session_id_for_attempt}, 响应: {str(response_json)[:500]}")
-                raise ValueError("OnDemand API sync response missing 'data.answer' field.")
-            ai_response_content = response_json["data"]["answer"]
-            if ai_response_content is None: ai_response_content = ""
-            if ai_response_content.strip():
-                logging.info(f"【同步请求】({log_attempt_str}) 成功获取非空内容。")
-                openai_response_obj = {
-                    "id": "chatcmpl-" + str(uuid.uuid4())[:12], "object": "chat.completion", "created": int(time.time()),
-                    "model": openai_model_name_for_response,
-                    "choices": [{"index": 0, "message": {"role": "assistant", "content": ai_response_content}, "finish_reason": "stop"}],
-                    "usage": {}
-                }
-                return jsonify(openai_response_obj)
-            else:
-                logging.warning(f"【同步请求】({log_attempt_str}) 返回空回复。")
-                if empty_retry_attempt_num >= max_empty_response_retries:
-                    logging.error(f"【同步请求】达到最大空回复重试次数 ({max_empty_response_retries})。")
-                    return jsonify({
-                        "error": f"Model returned an empty response after {max_empty_response_retries} retries.",
                         "id": "chatcmpl-" + str(uuid.uuid4())[:12], "object": "chat.completion", "created": int(time.time()),
                         "model": openai_model_name_for_response,
-                        "choices": [{"index": 0, "message": {"role": "assistant", "content": ""}, "finish_reason": "length"}],
-                        "usage": {}
-                    }), 500
-                logging.info(f"【同步请求】空回复，将在1秒后重试下一个Key。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
-                time.sleep(1)
-        except requests.exceptions.RequestException as e_req:
-            logging.warning(f"【同步请求】({log_attempt_str} using key {keymgr.display_key(current_apikey_for_attempt)}) 发生请求级错误: {e_req}")
-            if current_apikey_for_attempt: # Ensure key is marked bad
-                keymgr.mark_bad(current_apikey_for_attempt)
-            if empty_retry_attempt_num == 1:
-                raise e_req
             if empty_retry_attempt_num >= max_empty_response_retries:
-                logging.error(f"【同步请求】在最后一次空回复重试时发生请求错误。")
-                return jsonify({"error": f"Request failed on final empty response retry attempt. Last error: {str(e_req)}"}), 503
-            time.sleep(1)
-            continue
-        except (ValueError, KeyError, json.JSONDecodeError) as e_parse:
-            logging.error(f"【同步请求】({log_attempt_str}) 处理响应时出错: {e_parse}", exc_info=True)
-            return jsonify({"error": f"Error processing OnDemand sync response: {str(e_parse)}"}), 502
-    logging.error(f"【同步请求】意外退出空回复重试循环。")
-    return jsonify({"error": "Unexpected error in non-stream handling after empty response retries."}), 500
 @app.route("/v1/models", methods=["GET"])

 def format_openai_sse_delta(chunk_data_dict):
     return f"data: {json.dumps(chunk_data_dict, ensure_ascii=False)}\n\n"
 def _execute_one_stream_attempt(apikey_for_attempt, session_id_for_attempt, query_str, endpoint_id, openai_model_name_for_response, current_attempt_num_logging):
     url = f"{ONDEMAND_API_BASE}/sessions/{session_id_for_attempt}/query"
     payload = {
         "query": query_str,
     }
     accumulated_text_parts = []
+    api_error_yielded = False
+    max_500_retries_for_this_call = 5
+    current_500_retry_count = 0
+    while current_500_retry_count < max_500_retries_for_this_call:
+        current_500_retry_count += 1
+        if current_500_retry_count > 1:
+            logging.info(f"【流式请求子尝试 {current_attempt_num_logging} - 500错误重试 {current_500_retry_count-1}/{max_500_retries_for_this_call-1}】Key: {keymgr.display_key(apikey_for_attempt)}")
+        else:
+             logging.info(f"【流式请求子尝试 {current_attempt_num_logging}】发送到 OnDemand: Session={session_id_for_attempt}, Endpoint={endpoint_id}, Key={keymgr.display_key(apikey_for_attempt)}")
+        try:
+            with requests.post(url, json=payload, headers=headers, stream=True, timeout=180) as resp:
+                if resp.status_code == 500:
+                    logging.warning(f"【OnDemand流错误】(子尝试 {current_attempt_num_logging}, 500重试 {current_500_retry_count}) 收到500错误。Session: {session_id_for_attempt}")
+                    if current_500_retry_count >= max_500_retries_for_this_call:
+                        logging.error(f"【OnDemand流错误】(子尝试 {current_attempt_num_logging}) 达到500错误最大重试次数。将错误传递给上层。")
+                        api_error_yielded = True
+                        error_payload = {"error": {"message": f"OnDemand API persistent 500 error after {max_500_retries_for_this_call} retries (Attempt {current_attempt_num_logging}).",
+                                                   "type": "on_demand_persistent_500_error", "code": 500}}
+                        yield format_openai_sse_delta(error_payload)
                         yield "data: [DONE]\n\n"
                         return "".join(accumulated_text_parts).strip(), api_error_yielded
+                    time.sleep(1)
+                    continue
+                if resp.status_code != 200:
+                    api_error_yielded = True
+                    error_text = resp.text
+                    logging.error(f"【OnDemand流错误】请求失败 (子尝试 {current_attempt_num_logging})。状态码: {resp.status_code}, Session: {session_id_for_attempt}, 响应: {error_text[:500]}")
+                    error_payload = {
+                        "error": {
+                            "message": f"OnDemand API Error (Stream Init, Attempt {current_attempt_num_logging}): {resp.status_code} - {error_text[:200]}",
+                            "type": "on_demand_api_error",
+                            "code": resp.status_code
+                        }
+                    }
+                    yield format_openai_sse_delta(error_payload)
+                    yield "data: [DONE]\n\n"
+                    return "".join(accumulated_text_parts).strip(), api_error_yielded
+                first_chunk_sent = False
+                last_line_str = ""
+                for line_bytes in resp.iter_lines():
+                    if not line_bytes:
+                        continue
+                    line_str = line_bytes.decode("utf-8")
+                    last_line_str = line_str
+                    if line_str.startswith("data:"):
+                        data_part = line_str[len("data:"):].strip()
+                        if data_part == "[DONE]":
+                            logging.info(f"【OnDemand流】接收到 [DONE] 信号 (子尝试 {current_attempt_num_logging})。Session: {session_id_for_attempt}")
+                            yield "data: [DONE]\n\n"
+                            return "".join(accumulated_text_parts).strip(), api_error_yielded
+                        elif data_part.startswith("[ERROR]:"):
+                            api_error_yielded = True
+                            error_json_str = data_part[len("[ERROR]:"):].strip()
+                            logging.warning(f"【OnDemand流】接收到错误事件 (子尝试 {current_attempt_num_logging}): {error_json_str}。Session: {session_id_for_attempt}")
+                            try:
+                                error_obj = json.loads(error_json_str)
+                            except json.JSONDecodeError:
+                                error_obj = {"message": error_json_str, "type": "on_demand_stream_error_format"}
+                            yield format_openai_sse_delta({"error": error_obj})
+                            yield "data: [DONE]\n\n"
+                            return "".join(accumulated_text_parts).strip(), api_error_yielded
+                        else:
+                            try:
+                                event_data = json.loads(data_part)
+                                if event_data.get("eventType") == "fulfillment":
+                                    delta_content = event_data.get("answer", "")
+                                    if delta_content is None: delta_content = ""
+                                    accumulated_text_parts.append(delta_content)
+                                    choice_delta = {}
+                                    if not first_chunk_sent:
+                                        choice_delta["role"] = "assistant"
+                                        choice_delta["content"] = delta_content
+                                        first_chunk_sent = True
+                                    else:
+                                        choice_delta["content"] = delta_content
+                                    if not choice_delta.get("content") and not choice_delta.get("role"):
+                                        if not (choice_delta.get("role") and not choice_delta.get("content")):
+                                            continue
+                                    openai_chunk = {
+                                        "id": "chatcmpl-" + str(uuid.uuid4())[:12],
+                                        "object": "chat.completion.chunk",
+                                        "created": int(time.time()),
+                                        "model": openai_model_name_for_response,
+                                        "choices": [{"delta": choice_delta, "index": 0, "finish_reason": None}]
+                                    }
+                                    yield format_openai_sse_delta(openai_chunk)
+                            except json.JSONDecodeError:
+                                logging.warning(f"【OnDemand流】无法解析JSON (子尝试 {current_attempt_num_logging}): {data_part[:100]}... Session: {session_id_for_attempt}")
+                                continue
+                if not api_error_yielded and not last_line_str.startswith("data: [DONE]"):
                      logging.info(f"【OnDemand流】(子尝试 {current_attempt_num_logging}) 流迭代完成，补充发送 [DONE]。Session: {session_id_for_attempt}")
                      yield "data: [DONE]\n\n"
+                return "".join(accumulated_text_parts).strip(), api_error_yielded
+        except requests.exceptions.RequestException as e_req_inner:
+            logging.error(f"【OnDemand流】(子尝试 {current_attempt_num_logging}) 请求时发生异常: {e_req_inner}, Key: {keymgr.display_key(apikey_for_attempt)}")
+            if current_500_retry_count >= max_500_retries_for_this_call or (hasattr(e_req_inner, 'response') and e_req_inner.response is not None and e_req_inner.response.status_code != 500):
+                raise e_req_inner
+            time.sleep(1)
+        except Exception as e_inner_unknown:
+            logging.error(f"【OnDemand流】处理流时发生未知错误 (子尝试 {current_attempt_num_logging}): {e_inner_unknown}, Session: {session_id_for_attempt}", exc_info=True)
+            api_error_yielded = True
+            error_payload = {
+                "error": {"message": f"Unknown error during streaming (Attempt {current_attempt_num_logging}): {str(e_inner_unknown)}", "type": "unknown_streaming_error_in_attempt"}
+            }
+            yield format_openai_sse_delta(error_payload)
+            yield "data: [DONE]\n\n"
+            return "".join(accumulated_text_parts).strip(), api_error_yielded
+    logging.error(f"【OnDemand流】(子尝试 {current_attempt_num_logging}) 500错误重试循环意外结束。")
+    raise requests.exceptions.RequestException(f"Exhausted internal 500 retries for attempt {current_attempt_num_logging} without success or specific error propagation.")
 @app.route("/v1/chat/completions", methods=["POST"])
     def attempt_ondemand_request_wrapper(current_apikey_from_wrapper, current_session_id_from_wrapper):
         if is_stream_request:
             return Response(
                 handle_stream_request(current_apikey_from_wrapper, current_session_id_from_wrapper, final_query_to_ondemand, target_endpoint_id, openai_model_name),
                 content_type='text/event-stream'
                 break
             except requests.exceptions.RequestException as http_err_outer:
                 last_exception_for_key_retry = http_err_outer
+                status_code_from_exc = None
+                if hasattr(http_err_outer, 'response') and http_err_outer.response is not None:
+                    status_code_from_exc = http_err_outer.response.status_code
+                logging.warning(f"【请求处理 - Key轮换尝试 {key_retry_count}】HTTP/请求错误。Status: {status_code_from_exc}, Key: {keymgr.display_key(selected_apikey_for_outer_retry) if selected_apikey_for_outer_retry else 'N/A'}, Error: {http_err_outer}")
+                if selected_apikey_for_outer_retry:
+                    if status_code_from_exc == 524:
+                        logging.info(f"【KeyManager】Key {keymgr.display_key(selected_apikey_for_outer_retry)} not marked bad due to 524 error.")
+                    elif status_code_from_exc == 500:
+                        logging.info(f"【KeyManager】Key {keymgr.display_key(selected_apikey_for_outer_retry)} not marked bad due to 500 error.")
+                    elif status_code_from_exc and ((400 <= status_code_from_exc < 500) or status_code_from_exc in [502, 503]): # Explicitly list codes that mark bad, excluding 500, 524
+                        keymgr.mark_bad(selected_apikey_for_outer_retry)
+                    elif not status_code_from_exc :
+                         keymgr.mark_bad(selected_apikey_for_outer_retry)
                 if key_retry_count >= max_key_retries:
                     logging.error(f"【请求处理】所有Key轮换尝试均失败。最后错误: {last_exception_for_key_retry}")
                     break
             except Exception as e_outer:
                 last_exception_for_key_retry = e_outer
                 logging.error(f"【请求处理 - Key轮换尝试 {key_retry_count}】发生意外严重错误: {e_outer}", exc_info=True)
+                if selected_apikey_for_outer_retry:
                     keymgr.mark_bad(selected_apikey_for_outer_retry)
                 break
+        error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
+        error_code_str = "max_retries_check_context_contact_admin"
+        logging.error(f"【请求处理】所有Key/Session获取尝试失败。最终错误: {error_message} Last underlying exception: {last_exception_for_key_retry}")
         if is_stream_request:
             def error_stream_gen():
+                yield format_openai_sse_delta({"error": {"message": error_message, "type": "proxy_max_retries_exceeded", "code": error_code_str}})
                 yield "data: [DONE]\n\n"
+            return Response(error_stream_gen(), content_type='text/event-stream', status=500)
         else:
+            return jsonify({"error": error_message, "code": error_code_str}), 500
     return with_valid_key_and_session(attempt_ondemand_request_wrapper)
 def handle_stream_request(initial_apikey, initial_session_id, query_str, endpoint_id, openai_model_name_for_response):
     max_empty_response_retries = 5
     empty_retry_attempt_num = 0
     while empty_retry_attempt_num < max_empty_response_retries:
         empty_retry_attempt_num += 1
         accumulated_text_this_attempt = ""
+        api_error_in_attempt = False
         if empty_retry_attempt_num > 1:
             logging.info(f"【流式请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session...")
                 logging.info(f"【流式请求-空回复重试 {empty_retry_attempt_num-1}】新Key/Session获取成功: Key={keymgr.display_key(current_apikey_for_attempt)}, Session={current_session_id_for_attempt}")
             except (ValueError, requests.exceptions.RequestException) as e_key_session:
                 logging.warning(f"【流式请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session失败: {e_key_session}")
+                status_code_from_exc_retry_setup = None
+                if hasattr(e_key_session, 'response') and e_key_session.response is not None:
+                    status_code_from_exc_retry_setup = e_key_session.response.status_code
                 if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
+                    if status_code_from_exc_retry_setup == 524:
+                        logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 524 error during key/session acquisition for stream retry.")
+                    elif status_code_from_exc_retry_setup == 500:
+                        logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 500 error during key/session acquisition for stream retry.")
+                    else:
+                        keymgr.mark_bad(current_apikey_for_attempt)
                 if empty_retry_attempt_num >= max_empty_response_retries:
+                    final_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
+                    final_error_code = "max_retries_check_context_contact_admin"
+                    logging.error(f"【流式请求】无法为最终空回复重试获取新Key/Session。错误: {e_key_session}")
+                    yield format_openai_sse_delta({"error": {"message": final_error_message, "type": "proxy_final_retry_setup_failed", "code": final_error_code, "details": str(e_key_session)}})
                     yield "data: [DONE]\n\n"
                     return
                 time.sleep(1)
         log_attempt_str = f"初始尝试" if empty_retry_attempt_num == 1 else f"空回复重试 {empty_retry_attempt_num-1}"
         try:
             result_tuple = yield from _execute_one_stream_attempt(
                 current_apikey_for_attempt,
                 current_session_id_for_attempt,
                 f"{log_attempt_str} (Overall attempt {empty_retry_attempt_num})"
             )
             accumulated_text_this_attempt = result_tuple[0]
+            api_error_in_attempt = result_tuple[1]
+        except requests.exceptions.RequestException as e_req:
+            log_key_display = keymgr.display_key(current_apikey_for_attempt) if current_apikey_for_attempt else "N/A"
+            status_code_from_exc_stream = None
+            if hasattr(e_req, 'response') and e_req.response is not None:
+                status_code_from_exc_stream = e_req.response.status_code
+            logging.warning(f"【流式请求】({log_attempt_str} using key {log_key_display}) 发生请求级错误: {e_req}, Status: {status_code_from_exc_stream}")
+            if current_apikey_for_attempt:
+                if status_code_from_exc_stream == 524:
+                    logging.info(f"【KeyManager】Key {log_key_display} not marked bad for 524 error during stream attempt.")
+                elif status_code_from_exc_stream == 500:
+                    logging.info(f"【KeyManager】Key {log_key_display} not marked bad for 500 error during stream attempt.")
+                else:
+                    keymgr.mark_bad(current_apikey_for_attempt)
             if empty_retry_attempt_num == 1:
                 raise e_req
             if empty_retry_attempt_num >= max_empty_response_retries:
+                final_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
+                final_error_code = "max_retries_check_context_contact_admin"
+                logging.error(f"【流式请求】在最后一次空回复重试时发生请求错误: {e_req}")
+                yield format_openai_sse_delta({"error": {"message": final_error_message, "type": "proxy_final_retry_request_failed", "code": final_error_code, "details": str(e_req)}})
                 yield "data: [DONE]\n\n"
                 return
             time.sleep(1)
+            continue
         if api_error_in_attempt:
+            logging.warning(f"【流式请求】({log_attempt_str}) 子尝试已处理并流式传输API错误。")
             return
         if accumulated_text_this_attempt:
             logging.info(f"【流式请求】({log_attempt_str}) 成功获取非空内容。")
             return
         logging.warning(f"【流式请求】({log_attempt_str}) 返回空内容。")
         if empty_retry_attempt_num >= max_empty_response_retries:
+            final_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
+            final_error_code = "max_retries_check_context_contact_admin"
+            logging.error(f"【流式请求】达到最大空回复重试次数 ({max_empty_response_retries})。将返回指定错误。")
             yield format_openai_sse_delta({
+                "error": {"message": final_error_message, "type": "max_empty_retries_exceeded", "code": final_error_code}
             })
             yield "data: [DONE]\n\n"
             return
         logging.info(f"【流式请求】空回复，将在1秒后重试下一个Key。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
         time.sleep(1)
+    final_fallback_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
+    final_fallback_error_code = "max_retries_check_context_contact_admin_fallback"
+    logging.error(f"【流式请求】意外退出空回复重试循环。返回最终错误。")
+    yield format_openai_sse_delta({"error": {"message": final_fallback_error_message, "type": "internal_proxy_error_unexpected_exit", "code": final_fallback_error_code}})
     yield "data: [DONE]\n\n"
 def handle_non_stream_request(initial_apikey, initial_session_id, query_str, endpoint_id, openai_model_name_for_response):
+    max_empty_response_retries = 5
     empty_retry_attempt_num = 0
     current_apikey_for_attempt = initial_apikey
     current_session_id_for_attempt = initial_session_id
+    ai_response_content = "" # Define ai_response_content outside the try block to ensure it's available for the final empty check
     while empty_retry_attempt_num < max_empty_response_retries:
         empty_retry_attempt_num += 1
             try:
                 current_apikey_for_attempt = keymgr.get()
                 current_session_id_for_attempt = create_session(current_apikey_for_attempt)
                 logging.info(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】新Key/Session获取成功: Key={keymgr.display_key(current_apikey_for_attempt)}, Session={current_session_id_for_attempt}")
             except (ValueError, requests.exceptions.RequestException) as e_key_session:
                 logging.warning(f"【同步请求-空回复重试 {empty_retry_attempt_num-1}】获取新Key/Session失败: {e_key_session}")
+                status_code_from_exc_retry_setup_ns = None
+                if hasattr(e_key_session, 'response') and e_key_session.response is not None:
+                    status_code_from_exc_retry_setup_ns = e_key_session.response.status_code
                 if current_apikey_for_attempt and not isinstance(e_key_session, ValueError):
+                    if status_code_from_exc_retry_setup_ns == 524:
+                        logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 524 error during key/session acquisition for non-stream retry.")
+                    elif status_code_from_exc_retry_setup_ns == 500:
+                        logging.info(f"【KeyManager】Key {keymgr.display_key(current_apikey_for_attempt)} not marked bad for 500 error during key/session acquisition for non-stream retry.")
+                    else:
+                        keymgr.mark_bad(current_apikey_for_attempt)
                 if empty_retry_attempt_num >= max_empty_response_retries:
+                    final_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
+                    final_error_code = "max_retries_check_context_contact_admin"
+                    logging.error(f"【同步请求】无法为最终空回复重试获取新Key/Session。错误: {e_key_session}")
+                    return jsonify({"error": final_error_message, "code": final_error_code, "details": str(e_key_session)}), 500
                 time.sleep(1)
                 current_apikey_for_attempt = None
                 continue
         log_attempt_str = f"初始尝试" if empty_retry_attempt_num == 1 else f"空回复重试 {empty_retry_attempt_num-1}"
+        max_500_retries_for_this_call = 5
+        current_500_retry_count = 0
+        # Reset ai_response_content for each new attempt (especially for the 500-retry loop)
+        ai_response_content = ""
+        while current_500_retry_count < max_500_retries_for_this_call:
+            current_500_retry_count += 1
+            if current_500_retry_count > 1:
+                logging.info(f"【同步请求】({log_attempt_str}, 总尝试 {empty_retry_attempt_num}, 500错误重试 {current_500_retry_count-1}/{max_500_retries_for_this_call-1}) Session={current_session_id_for_attempt}, Key={keymgr.display_key(current_apikey_for_attempt)}")
+            else:
+                logging.info(f"【同步请求】({log_attempt_str}, 总尝试 {empty_retry_attempt_num}) Session={current_session_id_for_attempt}, Key={keymgr.display_key(current_apikey_for_attempt)}")
+            url = f"{ONDEMAND_API_BASE}/sessions/{current_session_id_for_attempt}/query"
+            # Corrected: Use query_str and endpoint_id parameters passed to the function
+            payload = { "query": query_str, "endpointId": endpoint_id, "pluginIds": [], "responseMode": "sync" }
+            headers = {"apikey": current_apikey_for_attempt, "Content-Type": "application/json"}
+            try:
+                resp = requests.post(url, json=payload, headers=headers, timeout=120)
+                if resp.status_code == 500:
+                    logging.warning(f"【OnDemand同步错误】({log_attempt_str}, 500重试 {current_500_retry_count}) 收到500错误。")
+                    if current_500_retry_count >= max_500_retries_for_this_call:
+                        logging.error(f"【OnDemand同步错误】({log_attempt_str}) 达到500错误最大重试次数。将错误传递给上层。")
+                        resp.raise_for_status()
+                    time.sleep(1)
+                    continue
+                resp.raise_for_status()
+                response_json = resp.json()
+                if "data" not in response_json or "answer" not in response_json["data"]:
+                    logging.error(f"【OnDemand同步错误】响应格式不符合预期 ({log_attempt_str})。Session: {current_session_id_for_attempt}, 响应: {str(response_json)[:500]}")
+                    raise ValueError(f"OnDemand API sync response missing 'data.answer' field on attempt {empty_retry_attempt_num}, 500-retry {current_500_retry_count}.")
+                ai_response_content = response_json["data"]["answer"]
+                if ai_response_content is None: ai_response_content = ""
+                if ai_response_content.strip():
+                    logging.info(f"【同步请求】({log_attempt_str}, 500重试 {current_500_retry_count}) 成功获取非空内容。")
+                    openai_response_obj = {
                         "id": "chatcmpl-" + str(uuid.uuid4())[:12], "object": "chat.completion", "created": int(time.time()),
                         "model": openai_model_name_for_response,
+                        "choices": [{"index": 0, "message": {"role": "assistant", "content": ai_response_content}, "finish_reason": "stop"}],
+                        "usage": {}
+                    }
+                    return jsonify(openai_response_obj) # SUCCESS
+                else:
+                    logging.warning(f"【同步请求】({log_attempt_str}, 500重试 {current_500_retry_count}) 返回空回复。")
+                    break
+            except requests.exceptions.RequestException as e_req:
+                log_key_display_sync = keymgr.display_key(current_apikey_for_attempt) if current_apikey_for_attempt else "N/A"
+                status_code_from_exc_sync = None
+                if hasattr(e_req, 'response') and e_req.response is not None:
+                    status_code_from_exc_sync = e_req.response.status_code
+                logging.warning(f"【同步请求】({log_attempt_str}, 500重试 {current_500_retry_count} using key {log_key_display_sync}) 发生请求级错误: {e_req}, Status: {status_code_from_exc_sync}")
+                if current_500_retry_count >= max_500_retries_for_this_call or status_code_from_exc_sync != 500:
+                    if empty_retry_attempt_num == 1:
+                        raise e_req
+                    else:
+                        raise e_req
+                time.sleep(1)
+                # Continue to the next iteration of current_500_retry_count loop
+            except (ValueError, KeyError, json.JSONDecodeError) as e_parse:
+                logging.error(f"【同步请求】({log_attempt_str}, 500重试 {current_500_retry_count}) 处理响应或格式时出错: {e_parse}", exc_info=True)
+                if empty_retry_attempt_num == 1 and current_500_retry_count == 1 :
+                     raise requests.exceptions.RequestException(f"Response format error on first attempt: {e_parse}") from e_parse
+                raise requests.exceptions.RequestException(f"Response format error during retry: {e_parse}") from e_parse
+        # After the 500-retry loop for the current key/session
+        if ai_response_content.strip(): # Should have been returned if non-empty
+            pass # Should not reach here if content was found
+        else: # Content is still empty for this key/session after 500-retries (or if 200 OK but empty)
             if empty_retry_attempt_num >= max_empty_response_retries:
+                final_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
+                final_error_code = "max_retries_check_context_contact_admin"
+                logging.error(f"【同步请求】达到最大空回复重试次数 ({max_empty_response_retries})。将返回指定错误。")
+                return jsonify({
+                    "error": final_error_message,
+                    "id": "chatcmpl-" + str(uuid.uuid4())[:12], "object": "chat.completion", "created": int(time.time()),
+                    "model": openai_model_name_for_response,
+                    "choices": [{"index": 0, "message": {"role": "assistant", "content": ""}, "finish_reason": "length"}],
+                    "usage": {}, "code": final_error_code
+                }), 500
+            logging.info(f"【同步请求】空回复（在500-重试循环之后），准备进行下一个空回复尝试。当前总尝试 {empty_retry_attempt_num}/{max_empty_response_retries}")
+            time.sleep(1)
+            # Outer loop (empty_retry_attempt_num) will continue to try a new key/session
+    final_fallback_error_message = "重试次数过多，请检查上下文长度! 或联系管理员!"
+    final_fallback_error_code = "max_retries_check_context_contact_admin_fallback"
+    logging.error(f"【同步请求】意外退出空回复重试循环。返回最终错误。")
+    return jsonify({"error": final_fallback_error_message, "code": final_fallback_error_code}), 500
 @app.route("/v1/models", methods=["GET"])