simonJJJ commited on
Commit
db0c725
1 Parent(s): e0175b5

Update tokenization_qwen.py

Browse files
Files changed (1) hide show
  1. tokenization_qwen.py +7 -1
tokenization_qwen.py CHANGED
@@ -27,6 +27,12 @@ logger = logging.getLogger(__name__)
27
 
28
 
29
  VOCAB_FILES_NAMES = {"vocab_file": "qwen.tiktoken", "ttf": "SimSun.ttf"}
 
 
 
 
 
 
30
 
31
  PAT_STR = r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+"""
32
  ENDOFTEXT = "<|endoftext|>"
@@ -515,7 +521,7 @@ class VisImage:
515
  class Visualizer:
516
  def __init__(self, img_rgb, metadata=None, scale=1.0):
517
  self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8)
518
- self.font_path = try_to_load_from_cache("Qwen/Qwen-VL-Chat", "SimSun.ttf")
519
  self.output = VisImage(self.img, scale=scale)
520
  self.cpu_device = torch.device("cpu")
521
 
 
27
 
28
 
29
  VOCAB_FILES_NAMES = {"vocab_file": "qwen.tiktoken", "ttf": "SimSun.ttf"}
30
+ FONT_PATH = try_to_load_from_cache("Qwen/Qwen-VL-Chat", "SimSun.ttf")
31
+ if FONT_PATH is None:
32
+ if not os.path.exists("SimSun.ttf"):
33
+ ttf = requests.get("https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/SimSun.ttf")
34
+ open("SimSun.ttf", "wb").write(ttf.content)
35
+ FONT_PATH = "SimSun.ttf"
36
 
37
  PAT_STR = r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+"""
38
  ENDOFTEXT = "<|endoftext|>"
 
521
  class Visualizer:
522
  def __init__(self, img_rgb, metadata=None, scale=1.0):
523
  self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8)
524
+ self.font_path = FONT_PATH
525
  self.output = VisImage(self.img, scale=scale)
526
  self.cpu_device = torch.device("cpu")
527