Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import json, logging | |
| from typing import List, Generator, Optional | |
| from openai import OpenAI | |
| def request_generation( | |
| api_key: str, | |
| api_base: str, | |
| message: str, | |
| system_prompt: str, | |
| model_name: str, | |
| chat_history: Optional[List[dict]] = None, | |
| temperature: float = 0.3, | |
| max_new_tokens: int = 1024, | |
| reasoning_effort: str = "off", | |
| tools: Optional[List[dict]] = None, | |
| tool_choice: Optional[str] = None, | |
| ) -> Generator[str, None, None]: | |
| """ | |
| Streams Responses API events. Emits: | |
| - "analysis" sentinel once, then raw reasoning deltas | |
| - "assistantfinal" sentinel once, then visible output deltas | |
| If no visible deltas, emits a tool-call fallback message. | |
| """ | |
| client = OpenAI(api_key=api_key, base_url=api_base) | |
| input_messages: List[dict] = [] | |
| if chat_history: | |
| input_messages.extend(m for m in chat_history if m.get("role") != "system") | |
| input_messages.append({"role": "user", "content": message}) | |
| request_args = { | |
| "model": model_name, | |
| "input": input_messages, | |
| "instructions": system_prompt, | |
| "temperature": temperature, | |
| "max_output_tokens": max_new_tokens, | |
| "reasoning": { | |
| "effort": reasoning_effort, | |
| "generate_summary": "detailed", | |
| "summary": "detailed", | |
| }, | |
| "stream": True, | |
| } | |
| if tools: | |
| request_args["tools"] = tools | |
| if tool_choice: | |
| request_args["tool_choice"] = tool_choice | |
| raw_reasoning, raw_visible = [], [] | |
| try: | |
| stream = client.responses.create(**request_args) | |
| reasoning_started = False | |
| reasoning_closed = False | |
| saw_visible_output = False | |
| last_tool_name = None | |
| last_tool_args = None | |
| buffer = "" | |
| for event in stream: | |
| et = getattr(event, "type", "") | |
| if et == "response.reasoning_text.delta": | |
| if not reasoning_started: | |
| yield "analysis" | |
| reasoning_started = True | |
| rdelta = getattr(event, "delta", "") or "" | |
| if rdelta: | |
| raw_reasoning.append(rdelta) | |
| yield rdelta | |
| continue | |
| if et == "response.output_text.delta": | |
| if reasoning_started and not reasoning_closed: | |
| yield "assistantfinal" | |
| reasoning_closed = True | |
| saw_visible_output = True | |
| delta = getattr(event, "delta", "") or "" | |
| raw_visible.append(delta) | |
| buffer += delta | |
| if "\n" in buffer or len(buffer) > 150: | |
| yield buffer | |
| buffer = "" | |
| continue | |
| if et.startswith("response.tool") or et.startswith("response.function_call"): | |
| name = getattr(event, "name", None) | |
| args = getattr(event, "arguments", None) | |
| if args is None: | |
| args = getattr(event, "args", None) or getattr(event, "delta", None) or getattr(event, "data", None) | |
| if name: | |
| last_tool_name = name | |
| if args is not None: | |
| last_tool_args = args | |
| continue | |
| if et in ("response.completed", "response.error"): | |
| if buffer: | |
| yield buffer | |
| buffer = "" | |
| if reasoning_started and not reasoning_closed: | |
| yield "assistantfinal" | |
| reasoning_closed = True | |
| if not saw_visible_output: | |
| msg = "I attempted to call a tool, but tools aren't executed in this environment, so no final answer was produced." | |
| if last_tool_name: | |
| try: | |
| args_text = json.dumps(last_tool_args, ensure_ascii=False, default=str) | |
| except Exception: | |
| args_text = str(last_tool_args) | |
| msg += f"\n\n• Tool requested: **{last_tool_name}**\n• Arguments: `{args_text}`" | |
| yield msg | |
| if et == "response.error": | |
| err = getattr(event, "error", None) | |
| emsg = getattr(err, "message", "") if err else "Unknown error" | |
| yield f"Error: {emsg}" | |
| break | |
| if buffer: | |
| yield buffer | |
| except Exception as e: | |
| logging.exception("[Gateway] Streaming failed") | |
| yield f"Error: {e}" | |