mtyrrell commited on
Commit
8a344c6
·
1 Parent(s): 1808f78

ts: file upload max context

Browse files
Files changed (1) hide show
  1. app/nodes.py +36 -5
app/nodes.py CHANGED
@@ -198,18 +198,29 @@ async def generate_node_streaming(state: GraphState) -> Generator[GraphState, No
198
  try:
199
  # Get MAX_CONTEXT_CHARS at the beginning so it's available throughout the function
200
  MAX_CONTEXT_CHARS = int(config.get("general", "MAX_CONTEXT_CHARS"))
 
201
 
202
  # Combine retriever context with ingestor context
203
  retrieved_context = state.get("context", "")
204
  ingestor_context = state.get("ingestor_context", "")
205
 
 
 
206
  # Convert contexts to list format expected by generator
207
  context_list = []
 
208
 
209
  if ingestor_context:
 
 
 
 
 
 
 
210
  # Add ingestor context
211
  context_list.append({
212
- "answer": ingestor_context,
213
  "answer_metadata": {
214
  "filename": state.get("filename", "Uploaded Document"),
215
  "page": "Unknown",
@@ -217,11 +228,29 @@ async def generate_node_streaming(state: GraphState) -> Generator[GraphState, No
217
  "source": "Ingestor"
218
  }
219
  })
 
220
 
221
- if retrieved_context:
222
  # Convert retrieved context to list and add
223
  retrieved_list = convert_context_to_list(retrieved_context)
224
- context_list.extend(retrieved_list)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
 
226
  # Prepare the request payload
227
  payload = {
@@ -285,7 +314,8 @@ async def generate_node_streaming(state: GraphState) -> Generator[GraphState, No
285
  "result_length": len(current_text),
286
  "generation_success": True,
287
  "streaming": True,
288
- "generator_type": "fastapi"
 
289
  })
290
 
291
  yield {
@@ -302,7 +332,8 @@ async def generate_node_streaming(state: GraphState) -> Generator[GraphState, No
302
  "result_length": len(current_text),
303
  "generation_success": True,
304
  "streaming": True,
305
- "generator_type": "fastapi"
 
306
  })
307
 
308
  yield {
 
198
  try:
199
  # Get MAX_CONTEXT_CHARS at the beginning so it's available throughout the function
200
  MAX_CONTEXT_CHARS = int(config.get("general", "MAX_CONTEXT_CHARS"))
201
+ logger.info(f"Using MAX_CONTEXT_CHARS: {MAX_CONTEXT_CHARS}")
202
 
203
  # Combine retriever context with ingestor context
204
  retrieved_context = state.get("context", "")
205
  ingestor_context = state.get("ingestor_context", "")
206
 
207
+ logger.info(f"Original context lengths - Ingestor: {len(ingestor_context)}, Retrieved: {len(retrieved_context)}")
208
+
209
  # Convert contexts to list format expected by generator
210
  context_list = []
211
+ total_context_chars = 0
212
 
213
  if ingestor_context:
214
+ # Truncate ingestor context if it's too long
215
+ if len(ingestor_context) > MAX_CONTEXT_CHARS:
216
+ logger.warning(f"Truncating ingestor context from {len(ingestor_context)} to {MAX_CONTEXT_CHARS} characters")
217
+ truncated_ingestor = ingestor_context[:MAX_CONTEXT_CHARS] + "...\n[Content truncated due to length]"
218
+ else:
219
+ truncated_ingestor = ingestor_context
220
+
221
  # Add ingestor context
222
  context_list.append({
223
+ "answer": truncated_ingestor,
224
  "answer_metadata": {
225
  "filename": state.get("filename", "Uploaded Document"),
226
  "page": "Unknown",
 
228
  "source": "Ingestor"
229
  }
230
  })
231
+ total_context_chars += len(truncated_ingestor)
232
 
233
+ if retrieved_context and total_context_chars < MAX_CONTEXT_CHARS:
234
  # Convert retrieved context to list and add
235
  retrieved_list = convert_context_to_list(retrieved_context)
236
+
237
+ # Add retrieved context items until we hit the limit
238
+ remaining_chars = MAX_CONTEXT_CHARS - total_context_chars
239
+ for item in retrieved_list:
240
+ item_text = item.get("answer", "")
241
+ if len(item_text) <= remaining_chars:
242
+ context_list.append(item)
243
+ remaining_chars -= len(item_text)
244
+ else:
245
+ # Truncate this item and stop
246
+ if remaining_chars > 100: # Only add if we have meaningful space left
247
+ item["answer"] = item_text[:remaining_chars-50] + "...\n[Content truncated]"
248
+ context_list.append(item)
249
+ break
250
+
251
+ # Calculate final context size
252
+ final_context_size = sum(len(item.get("answer", "")) for item in context_list)
253
+ logger.info(f"Final context size: {final_context_size} characters (limit: {MAX_CONTEXT_CHARS})")
254
 
255
  # Prepare the request payload
256
  payload = {
 
314
  "result_length": len(current_text),
315
  "generation_success": True,
316
  "streaming": True,
317
+ "generator_type": "fastapi",
318
+ "context_chars_used": final_context_size
319
  })
320
 
321
  yield {
 
332
  "result_length": len(current_text),
333
  "generation_success": True,
334
  "streaming": True,
335
+ "generator_type": "fastapi",
336
+ "context_chars_used": final_context_size
337
  })
338
 
339
  yield {