Spaces:
Running
on
Zero
Running
on
Zero
adds debug
Browse files
app.py
CHANGED
|
@@ -220,11 +220,46 @@ def generate_response(message, history, system_message, max_tokens, temperature,
|
|
| 220 |
eos_token_id=tokenizer.eos_token_id,
|
| 221 |
# cache_implementation="static"
|
| 222 |
)
|
| 223 |
-
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
assistant_response = response[len(full_prompt):].strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
assistant_response = re.sub(r'<\|im_start\|>.*?<\|im_end\|>', '', assistant_response, flags=re.DOTALL)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
if not enable_thinking:
|
| 227 |
assistant_response = re.sub(r'<think>.*?</think>', '', assistant_response, flags=re.DOTALL)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
|
| 229 |
# Handle tool calls if present
|
| 230 |
if parsed_tools and ("<tool_call>" in assistant_response or "<code>" in assistant_response):
|
|
@@ -239,8 +274,19 @@ def generate_response(message, history, system_message, max_tokens, temperature,
|
|
| 239 |
code_call = code_match.group(1)
|
| 240 |
assistant_response += f"\n\n🐍 Python Tool Call: {code_call}\n\nNote: This is a simulated Python tool call. In a real scenario, the function would be executed and its output would be used to generate a final response."
|
| 241 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
assistant_response = assistant_response.strip()
|
| 243 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
return assistant_response
|
| 245 |
|
| 246 |
def user(user_message, history):
|
|
@@ -308,8 +354,8 @@ with gr.Blocks() as demo:
|
|
| 308 |
max_length = gr.Slider(
|
| 309 |
label="📏 Longueur de la réponse",
|
| 310 |
minimum=10,
|
| 311 |
-
maximum=
|
| 312 |
-
value=
|
| 313 |
step=1
|
| 314 |
)
|
| 315 |
temperature = gr.Slider(
|
|
|
|
| 220 |
eos_token_id=tokenizer.eos_token_id,
|
| 221 |
# cache_implementation="static"
|
| 222 |
)
|
| 223 |
+
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
| 224 |
+
|
| 225 |
+
# Debug: Print the full raw response
|
| 226 |
+
logger.info(f"=== FULL RAW RESPONSE DEBUG ===")
|
| 227 |
+
logger.info(f"Raw response length: {len(response)}")
|
| 228 |
+
logger.info(f"Raw response: {repr(response)}")
|
| 229 |
+
logger.info(f"Full prompt length: {len(full_prompt)}")
|
| 230 |
+
logger.info(f"Full prompt: {repr(full_prompt)}")
|
| 231 |
+
|
| 232 |
assistant_response = response[len(full_prompt):].strip()
|
| 233 |
+
|
| 234 |
+
# Debug: Print the extracted assistant response
|
| 235 |
+
logger.info(f"=== EXTRACTED ASSISTANT RESPONSE DEBUG ===")
|
| 236 |
+
logger.info(f"Extracted response length: {len(assistant_response)}")
|
| 237 |
+
logger.info(f"Extracted response: {repr(assistant_response)}")
|
| 238 |
+
|
| 239 |
+
# Debug: Print before cleanup
|
| 240 |
+
logger.info(f"=== BEFORE CLEANUP DEBUG ===")
|
| 241 |
+
logger.info(f"Before cleanup length: {len(assistant_response)}")
|
| 242 |
+
logger.info(f"Before cleanup: {repr(assistant_response)}")
|
| 243 |
+
|
| 244 |
assistant_response = re.sub(r'<\|im_start\|>.*?<\|im_end\|>', '', assistant_response, flags=re.DOTALL)
|
| 245 |
+
|
| 246 |
+
# Debug: Print after first cleanup
|
| 247 |
+
logger.info(f"=== AFTER FIRST CLEANUP DEBUG ===")
|
| 248 |
+
logger.info(f"After first cleanup length: {len(assistant_response)}")
|
| 249 |
+
logger.info(f"After first cleanup: {repr(assistant_response)}")
|
| 250 |
+
|
| 251 |
if not enable_thinking:
|
| 252 |
assistant_response = re.sub(r'<think>.*?</think>', '', assistant_response, flags=re.DOTALL)
|
| 253 |
+
|
| 254 |
+
# Debug: Print after thinking cleanup
|
| 255 |
+
logger.info(f"=== AFTER THINKING CLEANUP DEBUG ===")
|
| 256 |
+
logger.info(f"After thinking cleanup length: {len(assistant_response)}")
|
| 257 |
+
logger.info(f"After thinking cleanup: {repr(assistant_response)}")
|
| 258 |
+
|
| 259 |
+
# Debug: Print before tool call handling
|
| 260 |
+
logger.info(f"=== BEFORE TOOL CALL HANDLING DEBUG ===")
|
| 261 |
+
logger.info(f"Before tool call handling length: {len(assistant_response)}")
|
| 262 |
+
logger.info(f"Before tool call handling: {repr(assistant_response)}")
|
| 263 |
|
| 264 |
# Handle tool calls if present
|
| 265 |
if parsed_tools and ("<tool_call>" in assistant_response or "<code>" in assistant_response):
|
|
|
|
| 274 |
code_call = code_match.group(1)
|
| 275 |
assistant_response += f"\n\n🐍 Python Tool Call: {code_call}\n\nNote: This is a simulated Python tool call. In a real scenario, the function would be executed and its output would be used to generate a final response."
|
| 276 |
|
| 277 |
+
# Debug: Print after tool call handling
|
| 278 |
+
logger.info(f"=== AFTER TOOL CALL HANDLING DEBUG ===")
|
| 279 |
+
logger.info(f"After tool call handling length: {len(assistant_response)}")
|
| 280 |
+
logger.info(f"After tool call handling: {repr(assistant_response)}")
|
| 281 |
+
|
| 282 |
assistant_response = assistant_response.strip()
|
| 283 |
|
| 284 |
+
# Debug: Print final response
|
| 285 |
+
logger.info(f"=== FINAL RESPONSE DEBUG ===")
|
| 286 |
+
logger.info(f"Final response length: {len(assistant_response)}")
|
| 287 |
+
logger.info(f"Final response: {repr(assistant_response)}")
|
| 288 |
+
logger.info(f"=== END DEBUG ===")
|
| 289 |
+
|
| 290 |
return assistant_response
|
| 291 |
|
| 292 |
def user(user_message, history):
|
|
|
|
| 354 |
max_length = gr.Slider(
|
| 355 |
label="📏 Longueur de la réponse",
|
| 356 |
minimum=10,
|
| 357 |
+
maximum=9000, # maximum=32768,
|
| 358 |
+
value=1256,
|
| 359 |
step=1
|
| 360 |
)
|
| 361 |
temperature = gr.Slider(
|