DLight1551 commited on
Commit
9fba121
1 Parent(s): e914bd1
Files changed (2) hide show
  1. ixc_utils.py +4 -4
  2. modeling_internlm_xcomposer2.py +3 -1
ixc_utils.py CHANGED
@@ -66,7 +66,7 @@ def Video_transform(img, hd_num=25):
66
 
67
  return img
68
 
69
- def frame2img(imgs):
70
  new_imgs = []
71
  for img in imgs:
72
  w, h = img.size
@@ -83,7 +83,8 @@ def frame2img(imgs):
83
  new_w = 0
84
  new_h = 0
85
  pad = 40
86
- font = ImageFont.truetype(os.path.join(config._name_or_path, "SimHei.ttf"), pad)
 
87
  if w > h:
88
  for im in imgs:
89
  w,h = im.size
@@ -135,6 +136,5 @@ def load_video(video_path, num_frm=32, start=None, end=None):
135
  indices = [int(i*step_size) for i in range(num_frm)]
136
  images = [images[i] for i in indices]
137
  images = [Image.fromarray(arr) for arr in images]
138
- image = frame2img(images)
139
- return image
140
 
 
66
 
67
  return img
68
 
69
+ def frame2img(imgs, font_path):
70
  new_imgs = []
71
  for img in imgs:
72
  w, h = img.size
 
83
  new_w = 0
84
  new_h = 0
85
  pad = 40
86
+ print (font_path)
87
+ font = ImageFont.truetype(os.path.join(font_path, "SimHei.ttf"), pad)
88
  if w > h:
89
  for im in imgs:
90
  w,h = im.size
 
136
  indices = [int(i*step_size) for i in range(num_frm)]
137
  images = [images[i] for i in indices]
138
  images = [Image.fromarray(arr) for arr in images]
139
+ return images
 
140
 
modeling_internlm_xcomposer2.py CHANGED
@@ -45,7 +45,7 @@ import torchvision.transforms as transforms
45
  from torchvision.transforms.functional import InterpolationMode
46
 
47
  from .build_mlp import build_vision_projector, build_vision_tower
48
- from .ixc_utils import Image_transform, Video_transform, load_video
49
  from .configuration_internlm_xcomposer2 import InternLMXcomposer2Config
50
  from .modeling_internlm2 import (InternLM2_INPUTS_DOCSTRING, InternLM2Model,
51
  InternLM2PreTrainedModel)
@@ -102,6 +102,7 @@ class InternLMXComposer2ForCausalLM(InternLM2PreTrainedModel):
102
  config.hidden_size, config.vocab_size, bias=False)
103
  self.tokenizer = None
104
  self.hd_num = 25
 
105
 
106
  self.max_length = config.max_length
107
  print(f'Set max length to {self.max_length}')
@@ -163,6 +164,7 @@ class InternLMXComposer2ForCausalLM(InternLM2PreTrainedModel):
163
  image = Image_transform(image, hd_num = hd_num)
164
  elif ext.lower() in video_extensions:
165
  image = load_video(image)
 
166
  image = Video_transform(image, hd_num = hd_num)
167
  else:
168
  print ('Unknow input format', image)
 
45
  from torchvision.transforms.functional import InterpolationMode
46
 
47
  from .build_mlp import build_vision_projector, build_vision_tower
48
+ from .ixc_utils import Image_transform, Video_transform, load_video, frame2img
49
  from .configuration_internlm_xcomposer2 import InternLMXcomposer2Config
50
  from .modeling_internlm2 import (InternLM2_INPUTS_DOCSTRING, InternLM2Model,
51
  InternLM2PreTrainedModel)
 
102
  config.hidden_size, config.vocab_size, bias=False)
103
  self.tokenizer = None
104
  self.hd_num = 25
105
+ self._path = config._name_or_path
106
 
107
  self.max_length = config.max_length
108
  print(f'Set max length to {self.max_length}')
 
164
  image = Image_transform(image, hd_num = hd_num)
165
  elif ext.lower() in video_extensions:
166
  image = load_video(image)
167
+ image = frame2img(image, self._path)
168
  image = Video_transform(image, hd_num = hd_num)
169
  else:
170
  print ('Unknow input format', image)