Tonic commited on
Commit
9413e52
·
1 Parent(s): 7578eea

adds special tokens strip with fallback

Browse files
Files changed (1) hide show
  1. app.py +40 -10
app.py CHANGED
@@ -220,19 +220,49 @@ def generate_response(message, history, system_message, max_tokens, temperature,
220
  eos_token_id=tokenizer.eos_token_id,
221
  # cache_implementation="static"
222
  )
223
- response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
 
224
 
225
- # Debug: Print the full raw response
226
- logger.info(f"=== FULL RAW RESPONSE DEBUG ===")
227
- logger.info(f"Raw response length: {len(response)}")
228
- logger.info(f"Raw response: {repr(response)}")
229
- logger.info(f"Full prompt length: {len(full_prompt)}")
230
- logger.info(f"Full prompt: {repr(full_prompt)}")
231
 
232
- assistant_response = response[len(full_prompt):].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
- # Debug: Print the extracted assistant response
235
- logger.info(f"=== EXTRACTED ASSISTANT RESPONSE DEBUG ===")
236
  logger.info(f"Extracted response length: {len(assistant_response)}")
237
  logger.info(f"Extracted response: {repr(assistant_response)}")
238
 
 
220
  eos_token_id=tokenizer.eos_token_id,
221
  # cache_implementation="static"
222
  )
223
+ # First decode WITH special tokens to find markers
224
+ response_with_tokens = tokenizer.decode(output_ids[0], skip_special_tokens=False)
225
 
226
+ # Debug: Print the full raw response with tokens
227
+ logger.info(f"=== FULL RAW RESPONSE WITH TOKENS DEBUG ===")
228
+ logger.info(f"Raw response with tokens length: {len(response_with_tokens)}")
229
+ logger.info(f"Raw response with tokens: {repr(response_with_tokens)}")
 
 
230
 
231
+ # More robust response extraction - look for assistant marker
232
+ logger.info(f"Looking for assistant marker in response...")
233
+ if "<|im_start|>assistant" in response_with_tokens:
234
+ logger.info(f"Found assistant marker in response")
235
+ # Find the start of assistant response
236
+ assistant_start = response_with_tokens.find("<|im_start|>assistant")
237
+ logger.info(f"Assistant marker found at position: {assistant_start}")
238
+ if assistant_start != -1:
239
+ # Find the end of the assistant marker
240
+ marker_end = response_with_tokens.find("\n", assistant_start)
241
+ logger.info(f"Marker end found at position: {marker_end}")
242
+ if marker_end != -1:
243
+ assistant_response = response_with_tokens[marker_end + 1:].strip()
244
+ logger.info(f"Using marker-based extraction")
245
+ else:
246
+ assistant_response = response_with_tokens[assistant_start + len("<|im_start|>assistant"):].strip()
247
+ logger.info(f"Using fallback marker extraction")
248
+ else:
249
+ # Fallback to prompt-based extraction
250
+ response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
251
+ assistant_response = response[len(full_prompt):].strip()
252
+ logger.info(f"Using prompt-based extraction (marker not found)")
253
+ else:
254
+ # Fallback to original method
255
+ logger.info(f"No assistant marker found, using prompt-based extraction")
256
+ response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
257
+ assistant_response = response[len(full_prompt):].strip()
258
+
259
+ # Clean up any remaining special tokens
260
+ assistant_response = re.sub(r'<\|im_start\|>.*?<\|im_end\|>', '', assistant_response, flags=re.DOTALL)
261
+ assistant_response = re.sub(r'<\|im_start\|>', '', assistant_response)
262
+ assistant_response = re.sub(r'<\|im_end\|>', '', assistant_response)
263
 
264
+ # Debug: Print the extracted assistant response after cleanup
265
+ logger.info(f"=== EXTRACTED ASSISTANT RESPONSE AFTER CLEANUP DEBUG ===")
266
  logger.info(f"Extracted response length: {len(assistant_response)}")
267
  logger.info(f"Extracted response: {repr(assistant_response)}")
268