Tonic commited on
Commit
7578eea
·
1 Parent(s): 3436b88

adds debug

Browse files
Files changed (1) hide show
  1. app.py +49 -3
app.py CHANGED
@@ -220,11 +220,46 @@ def generate_response(message, history, system_message, max_tokens, temperature,
220
  eos_token_id=tokenizer.eos_token_id,
221
  # cache_implementation="static"
222
  )
223
- response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
224
  assistant_response = response[len(full_prompt):].strip()
 
 
 
 
 
 
 
 
 
 
 
225
  assistant_response = re.sub(r'<\|im_start\|>.*?<\|im_end\|>', '', assistant_response, flags=re.DOTALL)
 
 
 
 
 
 
226
  if not enable_thinking:
227
  assistant_response = re.sub(r'<think>.*?</think>', '', assistant_response, flags=re.DOTALL)
 
 
 
 
 
 
 
 
 
 
228
 
229
  # Handle tool calls if present
230
  if parsed_tools and ("<tool_call>" in assistant_response or "<code>" in assistant_response):
@@ -239,8 +274,19 @@ def generate_response(message, history, system_message, max_tokens, temperature,
239
  code_call = code_match.group(1)
240
  assistant_response += f"\n\n🐍 Python Tool Call: {code_call}\n\nNote: This is a simulated Python tool call. In a real scenario, the function would be executed and its output would be used to generate a final response."
241
 
 
 
 
 
 
242
  assistant_response = assistant_response.strip()
243
 
 
 
 
 
 
 
244
  return assistant_response
245
 
246
  def user(user_message, history):
@@ -308,8 +354,8 @@ with gr.Blocks() as demo:
308
  max_length = gr.Slider(
309
  label="📏 Longueur de la réponse",
310
  minimum=10,
311
- maximum=556, # maximum=32768,
312
- value=56,
313
  step=1
314
  )
315
  temperature = gr.Slider(
 
220
  eos_token_id=tokenizer.eos_token_id,
221
  # cache_implementation="static"
222
  )
223
+ response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
224
+
225
+ # Debug: Print the full raw response
226
+ logger.info(f"=== FULL RAW RESPONSE DEBUG ===")
227
+ logger.info(f"Raw response length: {len(response)}")
228
+ logger.info(f"Raw response: {repr(response)}")
229
+ logger.info(f"Full prompt length: {len(full_prompt)}")
230
+ logger.info(f"Full prompt: {repr(full_prompt)}")
231
+
232
  assistant_response = response[len(full_prompt):].strip()
233
+
234
+ # Debug: Print the extracted assistant response
235
+ logger.info(f"=== EXTRACTED ASSISTANT RESPONSE DEBUG ===")
236
+ logger.info(f"Extracted response length: {len(assistant_response)}")
237
+ logger.info(f"Extracted response: {repr(assistant_response)}")
238
+
239
+ # Debug: Print before cleanup
240
+ logger.info(f"=== BEFORE CLEANUP DEBUG ===")
241
+ logger.info(f"Before cleanup length: {len(assistant_response)}")
242
+ logger.info(f"Before cleanup: {repr(assistant_response)}")
243
+
244
  assistant_response = re.sub(r'<\|im_start\|>.*?<\|im_end\|>', '', assistant_response, flags=re.DOTALL)
245
+
246
+ # Debug: Print after first cleanup
247
+ logger.info(f"=== AFTER FIRST CLEANUP DEBUG ===")
248
+ logger.info(f"After first cleanup length: {len(assistant_response)}")
249
+ logger.info(f"After first cleanup: {repr(assistant_response)}")
250
+
251
  if not enable_thinking:
252
  assistant_response = re.sub(r'<think>.*?</think>', '', assistant_response, flags=re.DOTALL)
253
+
254
+ # Debug: Print after thinking cleanup
255
+ logger.info(f"=== AFTER THINKING CLEANUP DEBUG ===")
256
+ logger.info(f"After thinking cleanup length: {len(assistant_response)}")
257
+ logger.info(f"After thinking cleanup: {repr(assistant_response)}")
258
+
259
+ # Debug: Print before tool call handling
260
+ logger.info(f"=== BEFORE TOOL CALL HANDLING DEBUG ===")
261
+ logger.info(f"Before tool call handling length: {len(assistant_response)}")
262
+ logger.info(f"Before tool call handling: {repr(assistant_response)}")
263
 
264
  # Handle tool calls if present
265
  if parsed_tools and ("<tool_call>" in assistant_response or "<code>" in assistant_response):
 
274
  code_call = code_match.group(1)
275
  assistant_response += f"\n\n🐍 Python Tool Call: {code_call}\n\nNote: This is a simulated Python tool call. In a real scenario, the function would be executed and its output would be used to generate a final response."
276
 
277
+ # Debug: Print after tool call handling
278
+ logger.info(f"=== AFTER TOOL CALL HANDLING DEBUG ===")
279
+ logger.info(f"After tool call handling length: {len(assistant_response)}")
280
+ logger.info(f"After tool call handling: {repr(assistant_response)}")
281
+
282
  assistant_response = assistant_response.strip()
283
 
284
+ # Debug: Print final response
285
+ logger.info(f"=== FINAL RESPONSE DEBUG ===")
286
+ logger.info(f"Final response length: {len(assistant_response)}")
287
+ logger.info(f"Final response: {repr(assistant_response)}")
288
+ logger.info(f"=== END DEBUG ===")
289
+
290
  return assistant_response
291
 
292
  def user(user_message, history):
 
354
  max_length = gr.Slider(
355
  label="📏 Longueur de la réponse",
356
  minimum=10,
357
+ maximum=9000, # maximum=32768,
358
+ value=1256,
359
  step=1
360
  )
361
  temperature = gr.Slider(