Spaces:

Eliot0110
/

Travel_Assistant

Sleeping

App Files Files Community

Eliot0110 commited on Aug 5

Commit

ce08446

1 Parent(s): 7324283

fix: decoder

Browse files

Files changed (1) hide show

modules/ai_model.py +10 -4

modules/ai_model.py CHANGED Viewed

@@ -151,7 +151,7 @@ class AIModel:
             if input_type == "image" and isinstance(formatted_input, Image.Image):
                 image_token = getattr(self.processor.tokenizer, 'image_token', '<image>')
                 if image_token not in prompt:
-                    prompt = f"{image_token}\\n{prompt}"
                 inputs = self.processor(
                     text=prompt,
                     images=formatted_input,
@@ -163,7 +163,13 @@ class AIModel:
                     return_tensors="pt"
                 ).to(self.model.device, dtype=torch.bfloat16)
-            input_len = inputs.input_ids.shape[-1]
             with torch.inference_mode():
                 generation_args = {
                     "max_new_tokens": 512,
@@ -187,7 +193,8 @@ class AIModel:
                     **inputs,
                     **generation_args
                 )
-            generated_tokens = outputs[0][input_len:]
             decoded = self.processor.tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
             return decoded if decoded else "我理解了您的问题，请告诉我更多具体信息。"
@@ -205,7 +212,6 @@ class AIModel:
         if not self.is_available():
             log.error("模型未就绪，无法执行 chat_completion")
-            # 对于需要JSON输出的场景，返回一个表示错误的有效JSON字符串
             if kwargs.get("response_format", {}).get("type") == "json_object":
                 return '{"error": "Model not available"}'
             return "抱歉，AI 模型当前不可用。"

             if input_type == "image" and isinstance(formatted_input, Image.Image):
                 image_token = getattr(self.processor.tokenizer, 'image_token', '<image>')
                 if image_token not in prompt:
+                    prompt = f"{image_token}\n{prompt}"
                 inputs = self.processor(
                     text=prompt,
                     images=formatted_input,
                     return_tensors="pt"
                 ).to(self.model.device, dtype=torch.bfloat16)
+            if hasattr(inputs, 'input_ids') and inputs.input_ids.shape[-1] > 512:
+                log.warning(f"⚠️ 截断过长输入: {inputs.input_ids.shape[-1]} -> 512")
+                inputs.input_ids = inputs.input_ids[:, :512]
+                if hasattr(inputs, 'attention_mask'):
+                    inputs.attention_mask = inputs.attention_mask[:, :512]
             with torch.inference_mode():
                 generation_args = {
                     "max_new_tokens": 512,
                     **inputs,
                     **generation_args
                 )
+            input_length = inputs.input_ids.shape[-1]
+            generated_tokens = outputs[0][input_length:]
             decoded = self.processor.tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
             return decoded if decoded else "我理解了您的问题，请告诉我更多具体信息。"
         if not self.is_available():
             log.error("模型未就绪，无法执行 chat_completion")
             if kwargs.get("response_format", {}).get("type") == "json_object":
                 return '{"error": "Model not available"}'
             return "抱歉，AI 模型当前不可用。"