Spaces:

Tonic
/

Petite-LLM-3

Running on Zero

App Files Files Community

Tonic commited on Jul 30

Commit

9413e52

1 Parent(s): 7578eea

adds special tokens strip with fallback

Browse files

Files changed (1) hide show

app.py +40 -10

app.py CHANGED Viewed

@@ -220,19 +220,49 @@ def generate_response(message, history, system_message, max_tokens, temperature,
             eos_token_id=tokenizer.eos_token_id,
             # cache_implementation="static"
         )
-        response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-        # Debug: Print the full raw response
-        logger.info(f"=== FULL RAW RESPONSE DEBUG ===")
-        logger.info(f"Raw response length: {len(response)}")
-        logger.info(f"Raw response: {repr(response)}")
-        logger.info(f"Full prompt length: {len(full_prompt)}")
-        logger.info(f"Full prompt: {repr(full_prompt)}")
-        assistant_response = response[len(full_prompt):].strip()
-        # Debug: Print the extracted assistant response
-        logger.info(f"=== EXTRACTED ASSISTANT RESPONSE DEBUG ===")
         logger.info(f"Extracted response length: {len(assistant_response)}")
         logger.info(f"Extracted response: {repr(assistant_response)}")

             eos_token_id=tokenizer.eos_token_id,
             # cache_implementation="static"
         )
+        # First decode WITH special tokens to find markers
+        response_with_tokens = tokenizer.decode(output_ids[0], skip_special_tokens=False)
+        # Debug: Print the full raw response with tokens
+        logger.info(f"=== FULL RAW RESPONSE WITH TOKENS DEBUG ===")
+        logger.info(f"Raw response with tokens length: {len(response_with_tokens)}")
+        logger.info(f"Raw response with tokens: {repr(response_with_tokens)}")
+        # More robust response extraction - look for assistant marker
+        logger.info(f"Looking for assistant marker in response...")
+        if "<|im_start|>assistant" in response_with_tokens:
+            logger.info(f"Found assistant marker in response")
+            # Find the start of assistant response
+            assistant_start = response_with_tokens.find("<|im_start|>assistant")
+            logger.info(f"Assistant marker found at position: {assistant_start}")
+            if assistant_start != -1:
+                # Find the end of the assistant marker
+                marker_end = response_with_tokens.find("\n", assistant_start)
+                logger.info(f"Marker end found at position: {marker_end}")
+                if marker_end != -1:
+                    assistant_response = response_with_tokens[marker_end + 1:].strip()
+                    logger.info(f"Using marker-based extraction")
+                else:
+                    assistant_response = response_with_tokens[assistant_start + len("<|im_start|>assistant"):].strip()
+                    logger.info(f"Using fallback marker extraction")
+            else:
+                # Fallback to prompt-based extraction
+                response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+                assistant_response = response[len(full_prompt):].strip()
+                logger.info(f"Using prompt-based extraction (marker not found)")
+        else:
+            # Fallback to original method
+            logger.info(f"No assistant marker found, using prompt-based extraction")
+            response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+            assistant_response = response[len(full_prompt):].strip()
+        # Clean up any remaining special tokens
+        assistant_response = re.sub(r'<\|im_start\|>.*?<\|im_end\|>', '', assistant_response, flags=re.DOTALL)
+        assistant_response = re.sub(r'<\|im_start\|>', '', assistant_response)
+        assistant_response = re.sub(r'<\|im_end\|>', '', assistant_response)
+        # Debug: Print the extracted assistant response after cleanup
+        logger.info(f"=== EXTRACTED ASSISTANT RESPONSE AFTER CLEANUP DEBUG ===")
         logger.info(f"Extracted response length: {len(assistant_response)}")
         logger.info(f"Extracted response: {repr(assistant_response)}")