DLight1551
commited on
Commit
•
9fba121
1
Parent(s):
e914bd1
update
Browse files- ixc_utils.py +4 -4
- modeling_internlm_xcomposer2.py +3 -1
ixc_utils.py
CHANGED
@@ -66,7 +66,7 @@ def Video_transform(img, hd_num=25):
|
|
66 |
|
67 |
return img
|
68 |
|
69 |
-
def frame2img(imgs):
|
70 |
new_imgs = []
|
71 |
for img in imgs:
|
72 |
w, h = img.size
|
@@ -83,7 +83,8 @@ def frame2img(imgs):
|
|
83 |
new_w = 0
|
84 |
new_h = 0
|
85 |
pad = 40
|
86 |
-
|
|
|
87 |
if w > h:
|
88 |
for im in imgs:
|
89 |
w,h = im.size
|
@@ -135,6 +136,5 @@ def load_video(video_path, num_frm=32, start=None, end=None):
|
|
135 |
indices = [int(i*step_size) for i in range(num_frm)]
|
136 |
images = [images[i] for i in indices]
|
137 |
images = [Image.fromarray(arr) for arr in images]
|
138 |
-
|
139 |
-
return image
|
140 |
|
|
|
66 |
|
67 |
return img
|
68 |
|
69 |
+
def frame2img(imgs, font_path):
|
70 |
new_imgs = []
|
71 |
for img in imgs:
|
72 |
w, h = img.size
|
|
|
83 |
new_w = 0
|
84 |
new_h = 0
|
85 |
pad = 40
|
86 |
+
print (font_path)
|
87 |
+
font = ImageFont.truetype(os.path.join(font_path, "SimHei.ttf"), pad)
|
88 |
if w > h:
|
89 |
for im in imgs:
|
90 |
w,h = im.size
|
|
|
136 |
indices = [int(i*step_size) for i in range(num_frm)]
|
137 |
images = [images[i] for i in indices]
|
138 |
images = [Image.fromarray(arr) for arr in images]
|
139 |
+
return images
|
|
|
140 |
|
modeling_internlm_xcomposer2.py
CHANGED
@@ -45,7 +45,7 @@ import torchvision.transforms as transforms
|
|
45 |
from torchvision.transforms.functional import InterpolationMode
|
46 |
|
47 |
from .build_mlp import build_vision_projector, build_vision_tower
|
48 |
-
from .ixc_utils import Image_transform, Video_transform, load_video
|
49 |
from .configuration_internlm_xcomposer2 import InternLMXcomposer2Config
|
50 |
from .modeling_internlm2 import (InternLM2_INPUTS_DOCSTRING, InternLM2Model,
|
51 |
InternLM2PreTrainedModel)
|
@@ -102,6 +102,7 @@ class InternLMXComposer2ForCausalLM(InternLM2PreTrainedModel):
|
|
102 |
config.hidden_size, config.vocab_size, bias=False)
|
103 |
self.tokenizer = None
|
104 |
self.hd_num = 25
|
|
|
105 |
|
106 |
self.max_length = config.max_length
|
107 |
print(f'Set max length to {self.max_length}')
|
@@ -163,6 +164,7 @@ class InternLMXComposer2ForCausalLM(InternLM2PreTrainedModel):
|
|
163 |
image = Image_transform(image, hd_num = hd_num)
|
164 |
elif ext.lower() in video_extensions:
|
165 |
image = load_video(image)
|
|
|
166 |
image = Video_transform(image, hd_num = hd_num)
|
167 |
else:
|
168 |
print ('Unknow input format', image)
|
|
|
45 |
from torchvision.transforms.functional import InterpolationMode
|
46 |
|
47 |
from .build_mlp import build_vision_projector, build_vision_tower
|
48 |
+
from .ixc_utils import Image_transform, Video_transform, load_video, frame2img
|
49 |
from .configuration_internlm_xcomposer2 import InternLMXcomposer2Config
|
50 |
from .modeling_internlm2 import (InternLM2_INPUTS_DOCSTRING, InternLM2Model,
|
51 |
InternLM2PreTrainedModel)
|
|
|
102 |
config.hidden_size, config.vocab_size, bias=False)
|
103 |
self.tokenizer = None
|
104 |
self.hd_num = 25
|
105 |
+
self._path = config._name_or_path
|
106 |
|
107 |
self.max_length = config.max_length
|
108 |
print(f'Set max length to {self.max_length}')
|
|
|
164 |
image = Image_transform(image, hd_num = hd_num)
|
165 |
elif ext.lower() in video_extensions:
|
166 |
image = load_video(image)
|
167 |
+
image = frame2img(image, self._path)
|
168 |
image = Video_transform(image, hd_num = hd_num)
|
169 |
else:
|
170 |
print ('Unknow input format', image)
|