Yong Liu commited on
Commit
d36359f
·
1 Parent(s): 051c5a5

update handler

Browse files
Files changed (1) hide show
  1. handler.py +22 -1
handler.py CHANGED
@@ -1,13 +1,34 @@
1
  import os
2
  import json
3
  import torch
4
- from transformers import pipeline, AutoTokenizer
5
  from typing import Dict, List, Any, Optional, Union
6
 
7
  class EndpointHandler:
8
  def __init__(self, path=""):
9
  # Initialize model and tokenizer
10
  self.model_path = path if path else os.environ.get("MODEL_PATH", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
12
 
13
  # Create text generation pipeline
 
1
  import os
2
  import json
3
  import torch
4
+ from transformers import pipeline, AutoTokenizer, AutoConfig
5
  from typing import Dict, List, Any, Optional, Union
6
 
7
  class EndpointHandler:
8
  def __init__(self, path=""):
9
  # Initialize model and tokenizer
10
  self.model_path = path if path else os.environ.get("MODEL_PATH", "")
11
+
12
+ # Fix RoPE scaling configuration
13
+ try:
14
+ config = AutoConfig.from_pretrained(self.model_path)
15
+
16
+ # Check if config has rope_scaling attribute and fix the short_factor length
17
+ if hasattr(config, "rope_scaling") and "short_factor" in config.rope_scaling:
18
+ short_factor = config.rope_scaling["short_factor"]
19
+ if len(short_factor) == 48: # If we have the problematic length
20
+ print("Fixing rope_scaling short_factor length from 48 to 64")
21
+ # Pad to length 64
22
+ padded_short_factor = list(short_factor) + [0.0] * (64 - len(short_factor))
23
+ config.rope_scaling["short_factor"] = padded_short_factor
24
+
25
+ # Save the fixed config
26
+ config.save_pretrained(self.model_path)
27
+ print("Fixed config saved")
28
+ except Exception as e:
29
+ print(f"Warning: Could not fix RoPE scaling configuration: {str(e)}")
30
+
31
+ # Load tokenizer
32
  self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
33
 
34
  # Create text generation pipeline