icebear0828 Claude Opus 4.6 commited on
Commit
7366e72
·
1 Parent(s): fda1a14

feat: add reasoning/thinking output support for OpenAI and Anthropic routes

Browse files

- Always send `summary: "auto"` to Codex API so reasoning summaries are available
- Parse `response.reasoning_summary_text.delta/done` SSE events
- OpenAI route: emit `reasoning_content` in chunks when client sends `reasoning_effort`
- Anthropic route: emit thinking blocks when client sends `thinking.type: enabled/adaptive`
- Extract block lifecycle helpers in Anthropic translator to eliminate duplication
- Log unknown SSE events via console.debug for discovering new event types

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

CHANGELOG.md CHANGED
@@ -8,6 +8,7 @@
8
 
9
  ### Added
10
 
 
11
  - 图片输入支持:OpenAI、Anthropic、Gemini 三种格式的图片内容现在可以正确透传到 Codex 后端(`input_image` + data URI),此前图片被静默丢弃
12
  - 每窗口使用量计数器:Dashboard 主显示当前窗口内的请求数和 Token 用量,累计总量降为次要灰色小字;窗口过期时自动归零(时间驱动,零 API 开销),后端同步作为双保险校正
13
  - 窗口时长显示:从后端同步 `limit_window_seconds`,AccountCard header 显示窗口时长 badge(如 `3h`),重置时间行追加窗口时长文字
 
8
 
9
  ### Added
10
 
11
+ - Reasoning/Thinking 输出支持:始终向 Codex API 发送 `summary: "auto"` 以获取推理摘要事件;OpenAI 路由在客户端发送 `reasoning_effort` 时以 `reasoning_content` 输出;Anthropic 路由在客户端发送 `thinking.type: enabled/adaptive` 时以 thinking block 输出;未知 SSE 事件记录到 debug 日志以便发现新事件类型
12
  - 图片输入支持:OpenAI、Anthropic、Gemini 三种格式的图片内容现在可以正确透传到 Codex 后端(`input_image` + data URI),此前图片被静默丢弃
13
  - 每窗口使用量计数器:Dashboard 主显示当前窗口内的请求数和 Token 用量,累计总量降为次要灰色小字;窗口过期时自动归零(时间驱动,零 API 开销),后端同步作为双保险校正
14
  - 窗口时长显示:从后端同步 `limit_window_seconds`,AccountCard header 显示窗口时长 badge(如 `3h`),重置时间行追加窗口时长文字
src/proxy/codex-api.ts CHANGED
@@ -26,8 +26,8 @@ export interface CodexResponsesRequest {
26
  input: CodexInputItem[];
27
  stream: true;
28
  store: false;
29
- /** Optional: reasoning effort level */
30
- reasoning?: { effort: string };
31
  /** Optional: tools available to the model */
32
  tools?: unknown[];
33
  /** Optional: tool choice strategy */
 
26
  input: CodexInputItem[];
27
  stream: true;
28
  store: false;
29
+ /** Optional: reasoning effort + summary mode */
30
+ reasoning?: { effort?: string; summary?: string };
31
  /** Optional: tools available to the model */
32
  tools?: unknown[];
33
  /** Optional: tool choice strategy */
src/routes/chat.ts CHANGED
@@ -14,37 +14,41 @@ import {
14
  type FormatAdapter,
15
  } from "./shared/proxy-handler.js";
16
 
17
- const OPENAI_FORMAT: FormatAdapter = {
18
- tag: "Chat",
19
- noAccountStatus: 503,
20
- formatNoAccount: () => ({
21
- error: {
22
- message:
23
- "No available accounts. All accounts are expired or rate-limited.",
24
- type: "server_error",
25
- param: null,
26
- code: "no_available_accounts",
27
- },
28
- }),
29
- format429: (msg) => ({
30
- error: {
31
- message: msg,
32
- type: "rate_limit_error",
33
- param: null,
34
- code: "rate_limit_exceeded",
35
- },
36
- }),
37
- formatError: (_status, msg) => ({
38
- error: {
39
- message: msg,
40
- type: "server_error",
41
- param: null,
42
- code: "codex_api_error",
43
- },
44
- }),
45
- streamTranslator: streamCodexToOpenAI,
46
- collectTranslator: collectCodexResponse,
47
- };
 
 
 
 
48
 
49
  export function createChatRoutes(
50
  accountPool: AccountPool,
@@ -118,6 +122,7 @@ export function createChatRoutes(
118
  const req = parsed.data;
119
 
120
  const codexRequest = translateToCodexRequest(req);
 
121
 
122
  return handleProxyRequest(
123
  c,
@@ -137,7 +142,7 @@ export function createChatRoutes(
137
  model: codexRequest.model,
138
  isStreaming: req.stream,
139
  },
140
- OPENAI_FORMAT,
141
  );
142
  });
143
 
 
14
  type FormatAdapter,
15
  } from "./shared/proxy-handler.js";
16
 
17
+ function makeOpenAIFormat(wantReasoning: boolean): FormatAdapter {
18
+ return {
19
+ tag: "Chat",
20
+ noAccountStatus: 503,
21
+ formatNoAccount: () => ({
22
+ error: {
23
+ message:
24
+ "No available accounts. All accounts are expired or rate-limited.",
25
+ type: "server_error",
26
+ param: null,
27
+ code: "no_available_accounts",
28
+ },
29
+ }),
30
+ format429: (msg) => ({
31
+ error: {
32
+ message: msg,
33
+ type: "rate_limit_error",
34
+ param: null,
35
+ code: "rate_limit_exceeded",
36
+ },
37
+ }),
38
+ formatError: (_status, msg) => ({
39
+ error: {
40
+ message: msg,
41
+ type: "server_error",
42
+ param: null,
43
+ code: "codex_api_error",
44
+ },
45
+ }),
46
+ streamTranslator: (api, response, model, onUsage, onResponseId) =>
47
+ streamCodexToOpenAI(api, response, model, onUsage, onResponseId, wantReasoning),
48
+ collectTranslator: (api, response, model) =>
49
+ collectCodexResponse(api, response, model, wantReasoning),
50
+ };
51
+ }
52
 
53
  export function createChatRoutes(
54
  accountPool: AccountPool,
 
122
  const req = parsed.data;
123
 
124
  const codexRequest = translateToCodexRequest(req);
125
+ const wantReasoning = !!req.reasoning_effort;
126
 
127
  return handleProxyRequest(
128
  c,
 
142
  model: codexRequest.model,
143
  isStreaming: req.stream,
144
  },
145
+ makeOpenAIFormat(wantReasoning),
146
  );
147
  });
148
 
src/routes/messages.ts CHANGED
@@ -41,19 +41,23 @@ function contentToString(
41
  .join("\n");
42
  }
43
 
44
- const ANTHROPIC_FORMAT: FormatAdapter = {
45
- tag: "Messages",
46
- noAccountStatus: 529 as StatusCode,
47
- formatNoAccount: () =>
48
- makeError(
49
- "overloaded_error",
50
- "No available accounts. All accounts are expired or rate-limited.",
51
- ),
52
- format429: (msg) => makeError("rate_limit_error", msg),
53
- formatError: (_status, msg) => makeError("api_error", msg),
54
- streamTranslator: streamCodexToAnthropic,
55
- collectTranslator: collectCodexToAnthropicResponse,
56
- };
 
 
 
 
57
 
58
  export function createMessagesRoutes(
59
  accountPool: AccountPool,
@@ -121,6 +125,7 @@ export function createMessagesRoutes(
121
  }
122
 
123
  const codexRequest = translateAnthropicToCodexRequest(req);
 
124
 
125
  return handleProxyRequest(
126
  c,
@@ -133,7 +138,7 @@ export function createMessagesRoutes(
133
  model: req.model,
134
  isStreaming: req.stream,
135
  },
136
- ANTHROPIC_FORMAT,
137
  );
138
  });
139
 
 
41
  .join("\n");
42
  }
43
 
44
+ function makeAnthropicFormat(wantThinking: boolean): FormatAdapter {
45
+ return {
46
+ tag: "Messages",
47
+ noAccountStatus: 529 as StatusCode,
48
+ formatNoAccount: () =>
49
+ makeError(
50
+ "overloaded_error",
51
+ "No available accounts. All accounts are expired or rate-limited.",
52
+ ),
53
+ format429: (msg) => makeError("rate_limit_error", msg),
54
+ formatError: (_status, msg) => makeError("api_error", msg),
55
+ streamTranslator: (api, response, model, onUsage, onResponseId) =>
56
+ streamCodexToAnthropic(api, response, model, onUsage, onResponseId, wantThinking),
57
+ collectTranslator: (api, response, model) =>
58
+ collectCodexToAnthropicResponse(api, response, model, wantThinking),
59
+ };
60
+ }
61
 
62
  export function createMessagesRoutes(
63
  accountPool: AccountPool,
 
125
  }
126
 
127
  const codexRequest = translateAnthropicToCodexRequest(req);
128
+ const wantThinking = req.thinking?.type === "enabled" || req.thinking?.type === "adaptive";
129
 
130
  return handleProxyRequest(
131
  c,
 
138
  model: req.model,
139
  isStreaming: req.stream,
140
  },
141
+ makeAnthropicFormat(wantThinking),
142
  );
143
  });
144
 
src/translation/anthropic-to-codex.ts CHANGED
@@ -210,15 +210,13 @@ export function translateAnthropicToCodexRequest(
210
  request.previous_response_id = previousResponseId;
211
  }
212
 
213
- // Add reasoning effort: thinking param model default → config default
214
  const thinkingEffort = mapThinkingToEffort(req.thinking);
215
  const effort =
216
  thinkingEffort ??
217
  modelInfo?.defaultReasoningEffort ??
218
  config.model.default_reasoning_effort;
219
- if (effort) {
220
- request.reasoning = { effort };
221
- }
222
 
223
  return request;
224
  }
 
210
  request.previous_response_id = previousResponseId;
211
  }
212
 
213
+ // Always request reasoning summary (translation layer filters output on demand)
214
  const thinkingEffort = mapThinkingToEffort(req.thinking);
215
  const effort =
216
  thinkingEffort ??
217
  modelInfo?.defaultReasoningEffort ??
218
  config.model.default_reasoning_effort;
219
+ request.reasoning = { summary: "auto", ...(effort ? { effort } : {}) };
 
 
220
 
221
  return request;
222
  }
src/translation/codex-event-extractor.ts CHANGED
@@ -47,6 +47,7 @@ export interface ExtractedEvent {
47
  typed: TypedCodexEvent;
48
  responseId?: string;
49
  textDelta?: string;
 
50
  usage?: UsageInfo;
51
  error?: { code: string; message: string };
52
  functionCallStart?: FunctionCallStart;
@@ -69,6 +70,11 @@ export async function* iterateCodexEvents(
69
  const typed = parseCodexEvent(raw);
70
  const extracted: ExtractedEvent = { typed };
71
 
 
 
 
 
 
72
  switch (typed.type) {
73
  case "response.created":
74
  case "response.in_progress":
@@ -79,6 +85,10 @@ export async function* iterateCodexEvents(
79
  extracted.textDelta = typed.delta;
80
  break;
81
 
 
 
 
 
82
  case "response.output_item.added":
83
  if (typed.item.type === "function_call") {
84
  // Register item_id → call_id mapping
 
47
  typed: TypedCodexEvent;
48
  responseId?: string;
49
  textDelta?: string;
50
+ reasoningDelta?: string;
51
  usage?: UsageInfo;
52
  error?: { code: string; message: string };
53
  functionCallStart?: FunctionCallStart;
 
70
  const typed = parseCodexEvent(raw);
71
  const extracted: ExtractedEvent = { typed };
72
 
73
+ // Log unrecognized events to discover new Codex event types
74
+ if (typed.type === "unknown") {
75
+ console.debug(`[CodexEvents] Unknown event: ${raw.event}`, JSON.stringify(raw.data).slice(0, 300));
76
+ }
77
+
78
  switch (typed.type) {
79
  case "response.created":
80
  case "response.in_progress":
 
85
  extracted.textDelta = typed.delta;
86
  break;
87
 
88
+ case "response.reasoning_summary_text.delta":
89
+ extracted.reasoningDelta = typed.delta;
90
+ break;
91
+
92
  case "response.output_item.added":
93
  if (typed.item.type === "function_call") {
94
  // Register item_id → call_id mapping
src/translation/codex-to-anthropic.ts CHANGED
@@ -3,6 +3,7 @@
3
  *
4
  * Codex SSE events:
5
  * response.created → extract response ID
 
6
  * response.output_text.delta → content_block_delta (text_delta)
7
  * response.completed → content_block_stop + message_delta + message_stop
8
  *
@@ -31,6 +32,9 @@ function formatSSE(eventType: string, data: unknown): string {
31
  /**
32
  * Stream Codex Responses API events as Anthropic Messages SSE.
33
  * Yields string chunks ready to write to the HTTP response.
 
 
 
34
  */
35
  export async function* streamCodexToAnthropic(
36
  codexApi: CodexApi,
@@ -38,6 +42,7 @@ export async function* streamCodexToAnthropic(
38
  model: string,
39
  onUsage?: (usage: AnthropicUsageInfo) => void,
40
  onResponseId?: (id: string) => void,
 
41
  ): AsyncGenerator<string> {
42
  const msgId = `msg_${randomUUID().replace(/-/g, "").slice(0, 24)}`;
43
  let outputTokens = 0;
@@ -46,8 +51,42 @@ export async function* streamCodexToAnthropic(
46
  let hasContent = false;
47
  let contentIndex = 0;
48
  let textBlockStarted = false;
 
49
  const callIdsWithDeltas = new Set<string>();
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  // 1. message_start
52
  yield formatSSE("message_start", {
53
  type: "message_start",
@@ -63,33 +102,22 @@ export async function* streamCodexToAnthropic(
63
  },
64
  });
65
 
66
- // 2. content_block_start for text block at index 0
67
- yield formatSSE("content_block_start", {
68
- type: "content_block_start",
69
- index: contentIndex,
70
- content_block: { type: "text", text: "" },
71
- });
72
- textBlockStarted = true;
73
 
74
- // 3. Process Codex stream events
75
  for await (const evt of iterateCodexEvents(codexApi, rawResponse)) {
76
  if (evt.responseId) onResponseId?.(evt.responseId);
77
 
78
  // Handle upstream error events
79
  if (evt.error) {
80
- // Close current text block if open
81
- if (textBlockStarted) {
82
- yield formatSSE("content_block_delta", {
83
- type: "content_block_delta",
84
- index: contentIndex,
85
- delta: { type: "text_delta", text: `[Error] ${evt.error.code}: ${evt.error.message}` },
86
- });
87
- yield formatSSE("content_block_stop", {
88
- type: "content_block_stop",
89
- index: contentIndex,
90
- });
91
- textBlockStarted = false;
92
- }
93
  yield formatSSE("error", {
94
  type: "error",
95
  error: { type: "api_error", message: `${evt.error.code}: ${evt.error.message}` },
@@ -98,20 +126,34 @@ export async function* streamCodexToAnthropic(
98
  return;
99
  }
100
 
101
- // Handle function call start close text block, open tool_use block
102
- if (evt.functionCallStart) {
103
- hasToolCalls = true;
104
  hasContent = true;
105
-
106
- // Close text block if still open
107
- if (textBlockStarted) {
108
- yield formatSSE("content_block_stop", {
109
- type: "content_block_stop",
110
  index: contentIndex,
 
111
  });
112
- contentIndex++;
113
- textBlockStarted = false;
114
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  // Start tool_use block
117
  yield formatSSE("content_block_start", {
@@ -159,15 +201,10 @@ export async function* streamCodexToAnthropic(
159
  case "response.output_text.delta": {
160
  if (evt.textDelta) {
161
  hasContent = true;
162
- // Reopen a text block if the previous one was closed (e.g. after tool calls)
163
- if (!textBlockStarted) {
164
- yield formatSSE("content_block_start", {
165
- type: "content_block_start",
166
- index: contentIndex,
167
- content_block: { type: "text", text: "" },
168
- });
169
- textBlockStarted = true;
170
- }
171
  yield formatSSE("content_block_delta", {
172
  type: "content_block_delta",
173
  index: contentIndex,
@@ -184,7 +221,8 @@ export async function* streamCodexToAnthropic(
184
  onUsage?.({ input_tokens: inputTokens, output_tokens: outputTokens });
185
  }
186
  // Inject error text if stream completed with no content
187
- if (!hasContent && textBlockStarted) {
 
188
  yield formatSSE("content_block_delta", {
189
  type: "content_block_delta",
190
  index: contentIndex,
@@ -196,22 +234,18 @@ export async function* streamCodexToAnthropic(
196
  }
197
  }
198
 
199
- // 4. Close text block if still open (no tool calls, or text came before tools)
200
- if (textBlockStarted) {
201
- yield formatSSE("content_block_stop", {
202
- type: "content_block_stop",
203
- index: contentIndex,
204
- });
205
- }
206
 
207
- // 5. message_delta with stop_reason and usage
208
  yield formatSSE("message_delta", {
209
  type: "message_delta",
210
  delta: { stop_reason: hasToolCalls ? "tool_use" : "end_turn" },
211
  usage: { input_tokens: inputTokens, output_tokens: outputTokens },
212
  });
213
 
214
- // 6. message_stop
215
  yield formatSSE("message_stop", {
216
  type: "message_stop",
217
  });
@@ -225,6 +259,7 @@ export async function collectCodexToAnthropicResponse(
225
  codexApi: CodexApi,
226
  rawResponse: Response,
227
  model: string,
 
228
  ): Promise<{
229
  response: AnthropicMessagesResponse;
230
  usage: AnthropicUsageInfo;
@@ -232,6 +267,7 @@ export async function collectCodexToAnthropicResponse(
232
  }> {
233
  const id = `msg_${randomUUID().replace(/-/g, "").slice(0, 24)}`;
234
  let fullText = "";
 
235
  let inputTokens = 0;
236
  let outputTokens = 0;
237
  let responseId: string | null = null;
@@ -245,6 +281,7 @@ export async function collectCodexToAnthropicResponse(
245
  throw new Error(`Codex API error: ${evt.error.code}: ${evt.error.message}`);
246
  }
247
  if (evt.textDelta) fullText += evt.textDelta;
 
248
  if (evt.usage) {
249
  inputTokens = evt.usage.input_tokens;
250
  outputTokens = evt.usage.output_tokens;
@@ -270,6 +307,10 @@ export async function collectCodexToAnthropicResponse(
270
 
271
  const hasToolCalls = toolUseBlocks.length > 0;
272
  const content: AnthropicContentBlock[] = [];
 
 
 
 
273
  if (fullText) {
274
  content.push({ type: "text", text: fullText });
275
  }
 
3
  *
4
  * Codex SSE events:
5
  * response.created → extract response ID
6
+ * response.reasoning_summary_text.delta → thinking block (if wantThinking)
7
  * response.output_text.delta → content_block_delta (text_delta)
8
  * response.completed → content_block_stop + message_delta + message_stop
9
  *
 
32
  /**
33
  * Stream Codex Responses API events as Anthropic Messages SSE.
34
  * Yields string chunks ready to write to the HTTP response.
35
+ *
36
+ * When wantThinking is true, reasoning summary deltas are emitted as
37
+ * thinking content blocks before the text block.
38
  */
39
  export async function* streamCodexToAnthropic(
40
  codexApi: CodexApi,
 
42
  model: string,
43
  onUsage?: (usage: AnthropicUsageInfo) => void,
44
  onResponseId?: (id: string) => void,
45
+ wantThinking?: boolean,
46
  ): AsyncGenerator<string> {
47
  const msgId = `msg_${randomUUID().replace(/-/g, "").slice(0, 24)}`;
48
  let outputTokens = 0;
 
51
  let hasContent = false;
52
  let contentIndex = 0;
53
  let textBlockStarted = false;
54
+ let thinkingBlockStarted = false;
55
  const callIdsWithDeltas = new Set<string>();
56
 
57
+ // Helper: close an open block and advance the index
58
+ function* closeBlock(blockType: "thinking" | "text"): Generator<string> {
59
+ yield formatSSE("content_block_stop", {
60
+ type: "content_block_stop",
61
+ index: contentIndex,
62
+ });
63
+ contentIndex++;
64
+ if (blockType === "thinking") thinkingBlockStarted = false;
65
+ else textBlockStarted = false;
66
+ }
67
+
68
+ // Helper: ensure thinking block is closed before a non-thinking block
69
+ function* closeThinkingIfOpen(): Generator<string> {
70
+ if (thinkingBlockStarted) yield* closeBlock("thinking");
71
+ }
72
+
73
+ // Helper: ensure text block is closed
74
+ function* closeTextIfOpen(): Generator<string> {
75
+ if (textBlockStarted) yield* closeBlock("text");
76
+ }
77
+
78
+ // Helper: ensure a text block is open
79
+ function* ensureTextBlock(): Generator<string> {
80
+ if (!textBlockStarted) {
81
+ yield formatSSE("content_block_start", {
82
+ type: "content_block_start",
83
+ index: contentIndex,
84
+ content_block: { type: "text", text: "" },
85
+ });
86
+ textBlockStarted = true;
87
+ }
88
+ }
89
+
90
  // 1. message_start
91
  yield formatSSE("message_start", {
92
  type: "message_start",
 
102
  },
103
  });
104
 
105
+ // Don't eagerly open a text block wait for actual content so thinking can come first
 
 
 
 
 
 
106
 
107
+ // 2. Process Codex stream events
108
  for await (const evt of iterateCodexEvents(codexApi, rawResponse)) {
109
  if (evt.responseId) onResponseId?.(evt.responseId);
110
 
111
  // Handle upstream error events
112
  if (evt.error) {
113
+ yield* closeThinkingIfOpen();
114
+ yield* ensureTextBlock();
115
+ yield formatSSE("content_block_delta", {
116
+ type: "content_block_delta",
117
+ index: contentIndex,
118
+ delta: { type: "text_delta", text: `[Error] ${evt.error.code}: ${evt.error.message}` },
119
+ });
120
+ yield* closeBlock("text");
 
 
 
 
 
121
  yield formatSSE("error", {
122
  type: "error",
123
  error: { type: "api_error", message: `${evt.error.code}: ${evt.error.message}` },
 
126
  return;
127
  }
128
 
129
+ // Handle reasoning deltathinking block (only if client wants thinking)
130
+ if (evt.reasoningDelta && wantThinking) {
 
131
  hasContent = true;
132
+ yield* closeTextIfOpen();
133
+ // Open thinking block if not already open
134
+ if (!thinkingBlockStarted) {
135
+ yield formatSSE("content_block_start", {
136
+ type: "content_block_start",
137
  index: contentIndex,
138
+ content_block: { type: "thinking", thinking: "" },
139
  });
140
+ thinkingBlockStarted = true;
 
141
  }
142
+ yield formatSSE("content_block_delta", {
143
+ type: "content_block_delta",
144
+ index: contentIndex,
145
+ delta: { type: "thinking_delta", thinking: evt.reasoningDelta },
146
+ });
147
+ continue;
148
+ }
149
+
150
+ // Handle function call start → close open blocks, open tool_use block
151
+ if (evt.functionCallStart) {
152
+ hasToolCalls = true;
153
+ hasContent = true;
154
+
155
+ yield* closeThinkingIfOpen();
156
+ yield* closeTextIfOpen();
157
 
158
  // Start tool_use block
159
  yield formatSSE("content_block_start", {
 
201
  case "response.output_text.delta": {
202
  if (evt.textDelta) {
203
  hasContent = true;
204
+ // Close thinking block if open (transition from thinking → text)
205
+ yield* closeThinkingIfOpen();
206
+ // Open a text block if not already open
207
+ yield* ensureTextBlock();
 
 
 
 
 
208
  yield formatSSE("content_block_delta", {
209
  type: "content_block_delta",
210
  index: contentIndex,
 
221
  onUsage?.({ input_tokens: inputTokens, output_tokens: outputTokens });
222
  }
223
  // Inject error text if stream completed with no content
224
+ if (!hasContent) {
225
+ yield* ensureTextBlock();
226
  yield formatSSE("content_block_delta", {
227
  type: "content_block_delta",
228
  index: contentIndex,
 
234
  }
235
  }
236
 
237
+ // 3. Close any open blocks
238
+ yield* closeThinkingIfOpen();
239
+ yield* closeTextIfOpen();
 
 
 
 
240
 
241
+ // 4. message_delta with stop_reason and usage
242
  yield formatSSE("message_delta", {
243
  type: "message_delta",
244
  delta: { stop_reason: hasToolCalls ? "tool_use" : "end_turn" },
245
  usage: { input_tokens: inputTokens, output_tokens: outputTokens },
246
  });
247
 
248
+ // 5. message_stop
249
  yield formatSSE("message_stop", {
250
  type: "message_stop",
251
  });
 
259
  codexApi: CodexApi,
260
  rawResponse: Response,
261
  model: string,
262
+ wantThinking?: boolean,
263
  ): Promise<{
264
  response: AnthropicMessagesResponse;
265
  usage: AnthropicUsageInfo;
 
267
  }> {
268
  const id = `msg_${randomUUID().replace(/-/g, "").slice(0, 24)}`;
269
  let fullText = "";
270
+ let fullReasoning = "";
271
  let inputTokens = 0;
272
  let outputTokens = 0;
273
  let responseId: string | null = null;
 
281
  throw new Error(`Codex API error: ${evt.error.code}: ${evt.error.message}`);
282
  }
283
  if (evt.textDelta) fullText += evt.textDelta;
284
+ if (evt.reasoningDelta) fullReasoning += evt.reasoningDelta;
285
  if (evt.usage) {
286
  inputTokens = evt.usage.input_tokens;
287
  outputTokens = evt.usage.output_tokens;
 
307
 
308
  const hasToolCalls = toolUseBlocks.length > 0;
309
  const content: AnthropicContentBlock[] = [];
310
+ // Thinking block comes first if requested and available
311
+ if (wantThinking && fullReasoning) {
312
+ content.push({ type: "thinking", thinking: fullReasoning });
313
+ }
314
  if (fullText) {
315
  content.push({ type: "text", text: fullText });
316
  }
src/translation/codex-to-openai.ts CHANGED
@@ -38,6 +38,7 @@ export async function* streamCodexToOpenAI(
38
  model: string,
39
  onUsage?: (usage: UsageInfo) => void,
40
  onResponseId?: (id: string) => void,
 
41
  ): AsyncGenerator<string> {
42
  const chunkId = `chatcmpl-${randomUUID().replace(/-/g, "").slice(0, 24)}`;
43
  const created = Math.floor(Date.now() / 1000);
@@ -182,6 +183,24 @@ export async function* streamCodexToOpenAI(
182
  continue;
183
  }
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  switch (evt.typed.type) {
186
  case "response.output_text.delta": {
187
  if (evt.textDelta) {
@@ -251,10 +270,12 @@ export async function collectCodexResponse(
251
  codexApi: CodexApi,
252
  rawResponse: Response,
253
  model: string,
 
254
  ): Promise<{ response: ChatCompletionResponse; usage: UsageInfo; responseId: string | null }> {
255
  const id = `chatcmpl-${randomUUID().replace(/-/g, "").slice(0, 24)}`;
256
  const created = Math.floor(Date.now() / 1000);
257
  let fullText = "";
 
258
  let promptTokens = 0;
259
  let completionTokens = 0;
260
  let responseId: string | null = null;
@@ -268,6 +289,7 @@ export async function collectCodexResponse(
268
  throw new Error(`Codex API error: ${evt.error.code}: ${evt.error.message}`);
269
  }
270
  if (evt.textDelta) fullText += evt.textDelta;
 
271
  if (evt.usage) {
272
  promptTokens = evt.usage.input_tokens;
273
  completionTokens = evt.usage.output_tokens;
@@ -294,6 +316,9 @@ export async function collectCodexResponse(
294
  role: "assistant",
295
  content: fullText || null,
296
  };
 
 
 
297
  if (hasToolCalls) {
298
  message.tool_calls = toolCalls;
299
  }
 
38
  model: string,
39
  onUsage?: (usage: UsageInfo) => void,
40
  onResponseId?: (id: string) => void,
41
+ wantReasoning?: boolean,
42
  ): AsyncGenerator<string> {
43
  const chunkId = `chatcmpl-${randomUUID().replace(/-/g, "").slice(0, 24)}`;
44
  const created = Math.floor(Date.now() / 1000);
 
183
  continue;
184
  }
185
 
186
+ // Emit reasoning delta if client requested it
187
+ if (evt.reasoningDelta && wantReasoning) {
188
+ hasContent = true;
189
+ yield formatSSE({
190
+ id: chunkId,
191
+ object: "chat.completion.chunk",
192
+ created,
193
+ model,
194
+ choices: [
195
+ {
196
+ index: 0,
197
+ delta: { reasoning_content: evt.reasoningDelta },
198
+ finish_reason: null,
199
+ },
200
+ ],
201
+ });
202
+ }
203
+
204
  switch (evt.typed.type) {
205
  case "response.output_text.delta": {
206
  if (evt.textDelta) {
 
270
  codexApi: CodexApi,
271
  rawResponse: Response,
272
  model: string,
273
+ wantReasoning?: boolean,
274
  ): Promise<{ response: ChatCompletionResponse; usage: UsageInfo; responseId: string | null }> {
275
  const id = `chatcmpl-${randomUUID().replace(/-/g, "").slice(0, 24)}`;
276
  const created = Math.floor(Date.now() / 1000);
277
  let fullText = "";
278
+ let fullReasoning = "";
279
  let promptTokens = 0;
280
  let completionTokens = 0;
281
  let responseId: string | null = null;
 
289
  throw new Error(`Codex API error: ${evt.error.code}: ${evt.error.message}`);
290
  }
291
  if (evt.textDelta) fullText += evt.textDelta;
292
+ if (evt.reasoningDelta) fullReasoning += evt.reasoningDelta;
293
  if (evt.usage) {
294
  promptTokens = evt.usage.input_tokens;
295
  completionTokens = evt.usage.output_tokens;
 
316
  role: "assistant",
317
  content: fullText || null,
318
  };
319
+ if (wantReasoning && fullReasoning) {
320
+ message.reasoning_content = fullReasoning;
321
+ }
322
  if (hasToolCalls) {
323
  message.tool_calls = toolCalls;
324
  }
src/translation/gemini-to-codex.ts CHANGED
@@ -217,7 +217,7 @@ export function translateGeminiToCodexRequest(
217
  request.previous_response_id = previousResponseId;
218
  }
219
 
220
- // Add reasoning effort: thinkingBudget model default config default
221
  const thinkingEffort = budgetToEffort(
222
  req.generationConfig?.thinkingConfig?.thinkingBudget,
223
  );
@@ -225,9 +225,7 @@ export function translateGeminiToCodexRequest(
225
  thinkingEffort ??
226
  modelInfo?.defaultReasoningEffort ??
227
  config.model.default_reasoning_effort;
228
- if (effort) {
229
- request.reasoning = { effort };
230
- }
231
 
232
  return request;
233
  }
 
217
  request.previous_response_id = previousResponseId;
218
  }
219
 
220
+ // Always request reasoning summary (translation layer filters output on demand)
221
  const thinkingEffort = budgetToEffort(
222
  req.generationConfig?.thinkingConfig?.thinkingBudget,
223
  );
 
225
  thinkingEffort ??
226
  modelInfo?.defaultReasoningEffort ??
227
  config.model.default_reasoning_effort;
228
+ request.reasoning = { summary: "auto", ...(effort ? { effort } : {}) };
 
 
229
 
230
  return request;
231
  }
src/translation/openai-to-codex.ts CHANGED
@@ -179,14 +179,12 @@ export function translateToCodexRequest(
179
  request.previous_response_id = previousResponseId;
180
  }
181
 
182
- // Add reasoning effort if applicable
183
  const effort =
184
  req.reasoning_effort ??
185
  modelInfo?.defaultReasoningEffort ??
186
  config.model.default_reasoning_effort;
187
- if (effort) {
188
- request.reasoning = { effort };
189
- }
190
 
191
  return request;
192
  }
 
179
  request.previous_response_id = previousResponseId;
180
  }
181
 
182
+ // Always request reasoning summary (translation layer filters output on demand)
183
  const effort =
184
  req.reasoning_effort ??
185
  modelInfo?.defaultReasoningEffort ??
186
  config.model.default_reasoning_effort;
187
+ request.reasoning = { summary: "auto", ...(effort ? { effort } : {}) };
 
 
188
 
189
  return request;
190
  }
src/types/codex-events.ts CHANGED
@@ -43,6 +43,18 @@ export interface CodexCompletedEvent {
43
  response: CodexResponseData;
44
  }
45
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  // ── Function call event data shapes ─────────────────────────────
47
 
48
  export interface CodexOutputItemAddedEvent {
@@ -91,6 +103,8 @@ export type TypedCodexEvent =
91
  | CodexInProgressEvent
92
  | CodexTextDeltaEvent
93
  | CodexTextDoneEvent
 
 
94
  | CodexCompletedEvent
95
  | CodexOutputItemAddedEvent
96
  | CodexFunctionCallArgsDeltaEvent
@@ -152,6 +166,18 @@ export function parseCodexEvent(evt: CodexSSEEvent): TypedCodexEvent {
152
  }
153
  return { type: "unknown", raw: data };
154
  }
 
 
 
 
 
 
 
 
 
 
 
 
155
  case "response.completed": {
156
  const resp = parseResponseData(data);
157
  return resp
 
43
  response: CodexResponseData;
44
  }
45
 
46
+ // ── Reasoning summary event data shapes ─────────────────────────
47
+
48
+ export interface CodexReasoningSummaryDeltaEvent {
49
+ type: "response.reasoning_summary_text.delta";
50
+ delta: string;
51
+ }
52
+
53
+ export interface CodexReasoningSummaryDoneEvent {
54
+ type: "response.reasoning_summary_text.done";
55
+ text: string;
56
+ }
57
+
58
  // ── Function call event data shapes ─────────────────────────────
59
 
60
  export interface CodexOutputItemAddedEvent {
 
103
  | CodexInProgressEvent
104
  | CodexTextDeltaEvent
105
  | CodexTextDoneEvent
106
+ | CodexReasoningSummaryDeltaEvent
107
+ | CodexReasoningSummaryDoneEvent
108
  | CodexCompletedEvent
109
  | CodexOutputItemAddedEvent
110
  | CodexFunctionCallArgsDeltaEvent
 
166
  }
167
  return { type: "unknown", raw: data };
168
  }
169
+ case "response.reasoning_summary_text.delta": {
170
+ if (isRecord(data) && typeof data.delta === "string") {
171
+ return { type: "response.reasoning_summary_text.delta", delta: data.delta };
172
+ }
173
+ return { type: "unknown", raw: data };
174
+ }
175
+ case "response.reasoning_summary_text.done": {
176
+ if (isRecord(data) && typeof data.text === "string") {
177
+ return { type: "response.reasoning_summary_text.done", text: data.text };
178
+ }
179
+ return { type: "unknown", raw: data };
180
+ }
181
  case "response.completed": {
182
  const resp = parseResponseData(data);
183
  return resp
src/types/openai.ts CHANGED
@@ -90,6 +90,7 @@ export interface ChatCompletionChoice {
90
  message: {
91
  role: "assistant";
92
  content: string | null;
 
93
  tool_calls?: ChatCompletionToolCall[];
94
  };
95
  finish_reason: "stop" | "length" | "tool_calls" | "function_call" | null;
@@ -125,6 +126,7 @@ export interface ChatCompletionChunkToolCall {
125
  export interface ChatCompletionChunkDelta {
126
  role?: "assistant";
127
  content?: string | null;
 
128
  tool_calls?: ChatCompletionChunkToolCall[];
129
  }
130
 
 
90
  message: {
91
  role: "assistant";
92
  content: string | null;
93
+ reasoning_content?: string | null;
94
  tool_calls?: ChatCompletionToolCall[];
95
  };
96
  finish_reason: "stop" | "length" | "tool_calls" | "function_call" | null;
 
126
  export interface ChatCompletionChunkDelta {
127
  role?: "assistant";
128
  content?: string | null;
129
+ reasoning_content?: string | null;
130
  tool_calls?: ChatCompletionChunkToolCall[];
131
  }
132