DLight1551 commited on
Commit
853a0b4
1 Parent(s): 9fba121

update font

Browse files
Files changed (2) hide show
  1. ixc_utils.py +8 -3
  2. modeling_internlm_xcomposer2.py +3 -3
ixc_utils.py CHANGED
@@ -2,11 +2,18 @@ import os
2
  import torch
3
  import numpy as np
4
  import torchvision
 
5
  from PIL import Image, ImageDraw, ImageFont
6
  from torchvision.transforms.functional import InterpolationMode
7
  import torchvision.transforms as transforms
8
  from decord import VideoReader
9
 
 
 
 
 
 
 
10
  def padding_336(b, pad=336):
11
  width, height = b.size
12
  tar = int(np.ceil(height / pad) * pad)
@@ -66,7 +73,7 @@ def Video_transform(img, hd_num=25):
66
 
67
  return img
68
 
69
- def frame2img(imgs, font_path):
70
  new_imgs = []
71
  for img in imgs:
72
  w, h = img.size
@@ -83,8 +90,6 @@ def frame2img(imgs, font_path):
83
  new_w = 0
84
  new_h = 0
85
  pad = 40
86
- print (font_path)
87
- font = ImageFont.truetype(os.path.join(font_path, "SimHei.ttf"), pad)
88
  if w > h:
89
  for im in imgs:
90
  w,h = im.size
 
2
  import torch
3
  import numpy as np
4
  import torchvision
5
+ from urllib.request import urlopen
6
  from PIL import Image, ImageDraw, ImageFont
7
  from torchvision.transforms.functional import InterpolationMode
8
  import torchvision.transforms as transforms
9
  from decord import VideoReader
10
 
11
+ def get_font():
12
+ truetype_url = 'https://cdn-lfs-us-1.huggingface.co/repos/19/7a/197a751ef710da1639736f1b5c9ebc26bd38d236aba7f10bcf8b553084c66907/336a838f4a78e150826be608dae69de59d50948c3d2b71760e096ae764154bdc?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27SimHei.ttf%3B+filename%3D%22SimHei.ttf%22%3B&response-content-type=font%2Fttf&Expires=1720275312&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMDI3NTMxMn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzE5LzdhLzE5N2E3NTFlZjcxMGRhMTYzOTczNmYxYjVjOWViYzI2YmQzOGQyMzZhYmE3ZjEwYmNmOGI1NTMwODRjNjY5MDcvMzM2YTgzOGY0YTc4ZTE1MDgyNmJlNjA4ZGFlNjlkZTU5ZDUwOTQ4YzNkMmI3MTc2MGUwOTZhZTc2NDE1NGJkYz9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=aZAXME5llGK90xUsPHRuWouco5T92ngs63hhW0gIAWmrUup4Ed5y4lSqB5khoLCLlMHK5lC4QJ58JTFFnmVFgFsKA-XfggYJLXu-TIC6DnvQCLz4L6EvLwCR05jzWOWn3trDorazP%7Enb8nuYKPgwGkpsukvCcqpx5Y0%7EfA4XsUCmcaddmkhFkkS1Wp2QWDnJjFGkuRnm8fQLW%7EG3JCdd7EyBkr2uWG%7E3W7ff62l-f%7EQTvtXIpYTHF3SAeqbB-DYQMUIbQJTuSs0TiQPt3WYvchrbuKN0aqR5OLvDJI2Fl0omJCL-wESyj9L%7EC2sCyY2LCDoE8b6-omgbQal2KHv7cA__&Key-Pair-Id=K24J24Z295AEI9'
13
+ ff = urlopen(truetype_url)
14
+ font = ImageFont.truetype(ff, size=40)
15
+ return font
16
+
17
  def padding_336(b, pad=336):
18
  width, height = b.size
19
  tar = int(np.ceil(height / pad) * pad)
 
73
 
74
  return img
75
 
76
+ def frame2img(imgs, font):
77
  new_imgs = []
78
  for img in imgs:
79
  w, h = img.size
 
90
  new_w = 0
91
  new_h = 0
92
  pad = 40
 
 
93
  if w > h:
94
  for im in imgs:
95
  w,h = im.size
modeling_internlm_xcomposer2.py CHANGED
@@ -45,7 +45,7 @@ import torchvision.transforms as transforms
45
  from torchvision.transforms.functional import InterpolationMode
46
 
47
  from .build_mlp import build_vision_projector, build_vision_tower
48
- from .ixc_utils import Image_transform, Video_transform, load_video, frame2img
49
  from .configuration_internlm_xcomposer2 import InternLMXcomposer2Config
50
  from .modeling_internlm2 import (InternLM2_INPUTS_DOCSTRING, InternLM2Model,
51
  InternLM2PreTrainedModel)
@@ -102,7 +102,7 @@ class InternLMXComposer2ForCausalLM(InternLM2PreTrainedModel):
102
  config.hidden_size, config.vocab_size, bias=False)
103
  self.tokenizer = None
104
  self.hd_num = 25
105
- self._path = config._name_or_path
106
 
107
  self.max_length = config.max_length
108
  print(f'Set max length to {self.max_length}')
@@ -164,7 +164,7 @@ class InternLMXComposer2ForCausalLM(InternLM2PreTrainedModel):
164
  image = Image_transform(image, hd_num = hd_num)
165
  elif ext.lower() in video_extensions:
166
  image = load_video(image)
167
- image = frame2img(image, self._path)
168
  image = Video_transform(image, hd_num = hd_num)
169
  else:
170
  print ('Unknow input format', image)
 
45
  from torchvision.transforms.functional import InterpolationMode
46
 
47
  from .build_mlp import build_vision_projector, build_vision_tower
48
+ from .ixc_utils import Image_transform, Video_transform, load_video, frame2img, get_font
49
  from .configuration_internlm_xcomposer2 import InternLMXcomposer2Config
50
  from .modeling_internlm2 import (InternLM2_INPUTS_DOCSTRING, InternLM2Model,
51
  InternLM2PreTrainedModel)
 
102
  config.hidden_size, config.vocab_size, bias=False)
103
  self.tokenizer = None
104
  self.hd_num = 25
105
+ self.font = get_font()
106
 
107
  self.max_length = config.max_length
108
  print(f'Set max length to {self.max_length}')
 
164
  image = Image_transform(image, hd_num = hd_num)
165
  elif ext.lower() in video_extensions:
166
  image = load_video(image)
167
+ image = frame2img(image, self.font)
168
  image = Video_transform(image, hd_num = hd_num)
169
  else:
170
  print ('Unknow input format', image)