Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			Zero
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			Zero
	adds special tokens strip with fallback
Browse files
    	
        app.py
    CHANGED
    
    | @@ -220,19 +220,49 @@ def generate_response(message, history, system_message, max_tokens, temperature, | |
| 220 | 
             
                        eos_token_id=tokenizer.eos_token_id,
         | 
| 221 | 
             
                        # cache_implementation="static"
         | 
| 222 | 
             
                    )
         | 
| 223 | 
            -
                     | 
|  | |
| 224 |  | 
| 225 | 
            -
                    # Debug: Print the full raw response
         | 
| 226 | 
            -
                    logger.info(f"=== FULL RAW RESPONSE DEBUG ===")
         | 
| 227 | 
            -
                    logger.info(f"Raw response length: {len( | 
| 228 | 
            -
                    logger.info(f"Raw response: {repr( | 
| 229 | 
            -
                    logger.info(f"Full prompt length: {len(full_prompt)}")
         | 
| 230 | 
            -
                    logger.info(f"Full prompt: {repr(full_prompt)}")
         | 
| 231 |  | 
| 232 | 
            -
                     | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 233 |  | 
| 234 | 
            -
                    # Debug: Print the extracted assistant response
         | 
| 235 | 
            -
                    logger.info(f"=== EXTRACTED ASSISTANT RESPONSE DEBUG ===")
         | 
| 236 | 
             
                    logger.info(f"Extracted response length: {len(assistant_response)}")
         | 
| 237 | 
             
                    logger.info(f"Extracted response: {repr(assistant_response)}")
         | 
| 238 |  | 
|  | |
| 220 | 
             
                        eos_token_id=tokenizer.eos_token_id,
         | 
| 221 | 
             
                        # cache_implementation="static"
         | 
| 222 | 
             
                    )
         | 
| 223 | 
            +
                    # First decode WITH special tokens to find markers
         | 
| 224 | 
            +
                    response_with_tokens = tokenizer.decode(output_ids[0], skip_special_tokens=False)
         | 
| 225 |  | 
| 226 | 
            +
                    # Debug: Print the full raw response with tokens
         | 
| 227 | 
            +
                    logger.info(f"=== FULL RAW RESPONSE WITH TOKENS DEBUG ===")
         | 
| 228 | 
            +
                    logger.info(f"Raw response with tokens length: {len(response_with_tokens)}")
         | 
| 229 | 
            +
                    logger.info(f"Raw response with tokens: {repr(response_with_tokens)}")
         | 
|  | |
|  | |
| 230 |  | 
| 231 | 
            +
                    # More robust response extraction - look for assistant marker
         | 
| 232 | 
            +
                    logger.info(f"Looking for assistant marker in response...")
         | 
| 233 | 
            +
                    if "<|im_start|>assistant" in response_with_tokens:
         | 
| 234 | 
            +
                        logger.info(f"Found assistant marker in response")
         | 
| 235 | 
            +
                        # Find the start of assistant response
         | 
| 236 | 
            +
                        assistant_start = response_with_tokens.find("<|im_start|>assistant")
         | 
| 237 | 
            +
                        logger.info(f"Assistant marker found at position: {assistant_start}")
         | 
| 238 | 
            +
                        if assistant_start != -1:
         | 
| 239 | 
            +
                            # Find the end of the assistant marker
         | 
| 240 | 
            +
                            marker_end = response_with_tokens.find("\n", assistant_start)
         | 
| 241 | 
            +
                            logger.info(f"Marker end found at position: {marker_end}")
         | 
| 242 | 
            +
                            if marker_end != -1:
         | 
| 243 | 
            +
                                assistant_response = response_with_tokens[marker_end + 1:].strip()
         | 
| 244 | 
            +
                                logger.info(f"Using marker-based extraction")
         | 
| 245 | 
            +
                            else:
         | 
| 246 | 
            +
                                assistant_response = response_with_tokens[assistant_start + len("<|im_start|>assistant"):].strip()
         | 
| 247 | 
            +
                                logger.info(f"Using fallback marker extraction")
         | 
| 248 | 
            +
                        else:
         | 
| 249 | 
            +
                            # Fallback to prompt-based extraction
         | 
| 250 | 
            +
                            response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
         | 
| 251 | 
            +
                            assistant_response = response[len(full_prompt):].strip()
         | 
| 252 | 
            +
                            logger.info(f"Using prompt-based extraction (marker not found)")
         | 
| 253 | 
            +
                    else:
         | 
| 254 | 
            +
                        # Fallback to original method
         | 
| 255 | 
            +
                        logger.info(f"No assistant marker found, using prompt-based extraction")
         | 
| 256 | 
            +
                        response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
         | 
| 257 | 
            +
                        assistant_response = response[len(full_prompt):].strip()
         | 
| 258 | 
            +
                    
         | 
| 259 | 
            +
                    # Clean up any remaining special tokens
         | 
| 260 | 
            +
                    assistant_response = re.sub(r'<\|im_start\|>.*?<\|im_end\|>', '', assistant_response, flags=re.DOTALL)
         | 
| 261 | 
            +
                    assistant_response = re.sub(r'<\|im_start\|>', '', assistant_response)
         | 
| 262 | 
            +
                    assistant_response = re.sub(r'<\|im_end\|>', '', assistant_response)
         | 
| 263 |  | 
| 264 | 
            +
                    # Debug: Print the extracted assistant response after cleanup
         | 
| 265 | 
            +
                    logger.info(f"=== EXTRACTED ASSISTANT RESPONSE AFTER CLEANUP DEBUG ===")
         | 
| 266 | 
             
                    logger.info(f"Extracted response length: {len(assistant_response)}")
         | 
| 267 | 
             
                    logger.info(f"Extracted response: {repr(assistant_response)}")
         | 
| 268 |  | 
