Brain-LLM
/

phi4-mini-raw

Model card Files Files and versions

Yong Liu commited on Apr 18, 2025

Commit

d36359f

·

1 Parent(s): 051c5a5

update handler

Files changed (1) hide show

handler.py +22 -1

handler.py CHANGED Viewed

@@ -1,13 +1,34 @@
 import os
 import json
 import torch
-from transformers import pipeline, AutoTokenizer
 from typing import Dict, List, Any, Optional, Union
 class EndpointHandler:
     def __init__(self, path=""):
         # Initialize model and tokenizer
         self.model_path = path if path else os.environ.get("MODEL_PATH", "")
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
         # Create text generation pipeline

 import os
 import json
 import torch
+from transformers import pipeline, AutoTokenizer, AutoConfig
 from typing import Dict, List, Any, Optional, Union
 class EndpointHandler:
     def __init__(self, path=""):
         # Initialize model and tokenizer
         self.model_path = path if path else os.environ.get("MODEL_PATH", "")
+        # Fix RoPE scaling configuration
+        try:
+            config = AutoConfig.from_pretrained(self.model_path)
+            # Check if config has rope_scaling attribute and fix the short_factor length
+            if hasattr(config, "rope_scaling") and "short_factor" in config.rope_scaling:
+                short_factor = config.rope_scaling["short_factor"]
+                if len(short_factor) == 48:  # If we have the problematic length
+                    print("Fixing rope_scaling short_factor length from 48 to 64")
+                    # Pad to length 64
+                    padded_short_factor = list(short_factor) + [0.0] * (64 - len(short_factor))
+                    config.rope_scaling["short_factor"] = padded_short_factor
+                    # Save the fixed config
+                    config.save_pretrained(self.model_path)
+                    print("Fixed config saved")
+        except Exception as e:
+            print(f"Warning: Could not fix RoPE scaling configuration: {str(e)}")
+        # Load tokenizer
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
         # Create text generation pipeline