DLight1551
commited on
Commit
•
853a0b4
1
Parent(s):
9fba121
update font
Browse files- ixc_utils.py +8 -3
- modeling_internlm_xcomposer2.py +3 -3
ixc_utils.py
CHANGED
@@ -2,11 +2,18 @@ import os
|
|
2 |
import torch
|
3 |
import numpy as np
|
4 |
import torchvision
|
|
|
5 |
from PIL import Image, ImageDraw, ImageFont
|
6 |
from torchvision.transforms.functional import InterpolationMode
|
7 |
import torchvision.transforms as transforms
|
8 |
from decord import VideoReader
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
def padding_336(b, pad=336):
|
11 |
width, height = b.size
|
12 |
tar = int(np.ceil(height / pad) * pad)
|
@@ -66,7 +73,7 @@ def Video_transform(img, hd_num=25):
|
|
66 |
|
67 |
return img
|
68 |
|
69 |
-
def frame2img(imgs,
|
70 |
new_imgs = []
|
71 |
for img in imgs:
|
72 |
w, h = img.size
|
@@ -83,8 +90,6 @@ def frame2img(imgs, font_path):
|
|
83 |
new_w = 0
|
84 |
new_h = 0
|
85 |
pad = 40
|
86 |
-
print (font_path)
|
87 |
-
font = ImageFont.truetype(os.path.join(font_path, "SimHei.ttf"), pad)
|
88 |
if w > h:
|
89 |
for im in imgs:
|
90 |
w,h = im.size
|
|
|
2 |
import torch
|
3 |
import numpy as np
|
4 |
import torchvision
|
5 |
+
from urllib.request import urlopen
|
6 |
from PIL import Image, ImageDraw, ImageFont
|
7 |
from torchvision.transforms.functional import InterpolationMode
|
8 |
import torchvision.transforms as transforms
|
9 |
from decord import VideoReader
|
10 |
|
11 |
+
def get_font():
|
12 |
+
truetype_url = 'https://cdn-lfs-us-1.huggingface.co/repos/19/7a/197a751ef710da1639736f1b5c9ebc26bd38d236aba7f10bcf8b553084c66907/336a838f4a78e150826be608dae69de59d50948c3d2b71760e096ae764154bdc?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27SimHei.ttf%3B+filename%3D%22SimHei.ttf%22%3B&response-content-type=font%2Fttf&Expires=1720275312&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMDI3NTMxMn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzE5LzdhLzE5N2E3NTFlZjcxMGRhMTYzOTczNmYxYjVjOWViYzI2YmQzOGQyMzZhYmE3ZjEwYmNmOGI1NTMwODRjNjY5MDcvMzM2YTgzOGY0YTc4ZTE1MDgyNmJlNjA4ZGFlNjlkZTU5ZDUwOTQ4YzNkMmI3MTc2MGUwOTZhZTc2NDE1NGJkYz9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=aZAXME5llGK90xUsPHRuWouco5T92ngs63hhW0gIAWmrUup4Ed5y4lSqB5khoLCLlMHK5lC4QJ58JTFFnmVFgFsKA-XfggYJLXu-TIC6DnvQCLz4L6EvLwCR05jzWOWn3trDorazP%7Enb8nuYKPgwGkpsukvCcqpx5Y0%7EfA4XsUCmcaddmkhFkkS1Wp2QWDnJjFGkuRnm8fQLW%7EG3JCdd7EyBkr2uWG%7E3W7ff62l-f%7EQTvtXIpYTHF3SAeqbB-DYQMUIbQJTuSs0TiQPt3WYvchrbuKN0aqR5OLvDJI2Fl0omJCL-wESyj9L%7EC2sCyY2LCDoE8b6-omgbQal2KHv7cA__&Key-Pair-Id=K24J24Z295AEI9'
|
13 |
+
ff = urlopen(truetype_url)
|
14 |
+
font = ImageFont.truetype(ff, size=40)
|
15 |
+
return font
|
16 |
+
|
17 |
def padding_336(b, pad=336):
|
18 |
width, height = b.size
|
19 |
tar = int(np.ceil(height / pad) * pad)
|
|
|
73 |
|
74 |
return img
|
75 |
|
76 |
+
def frame2img(imgs, font):
|
77 |
new_imgs = []
|
78 |
for img in imgs:
|
79 |
w, h = img.size
|
|
|
90 |
new_w = 0
|
91 |
new_h = 0
|
92 |
pad = 40
|
|
|
|
|
93 |
if w > h:
|
94 |
for im in imgs:
|
95 |
w,h = im.size
|
modeling_internlm_xcomposer2.py
CHANGED
@@ -45,7 +45,7 @@ import torchvision.transforms as transforms
|
|
45 |
from torchvision.transforms.functional import InterpolationMode
|
46 |
|
47 |
from .build_mlp import build_vision_projector, build_vision_tower
|
48 |
-
from .ixc_utils import Image_transform, Video_transform, load_video, frame2img
|
49 |
from .configuration_internlm_xcomposer2 import InternLMXcomposer2Config
|
50 |
from .modeling_internlm2 import (InternLM2_INPUTS_DOCSTRING, InternLM2Model,
|
51 |
InternLM2PreTrainedModel)
|
@@ -102,7 +102,7 @@ class InternLMXComposer2ForCausalLM(InternLM2PreTrainedModel):
|
|
102 |
config.hidden_size, config.vocab_size, bias=False)
|
103 |
self.tokenizer = None
|
104 |
self.hd_num = 25
|
105 |
-
self.
|
106 |
|
107 |
self.max_length = config.max_length
|
108 |
print(f'Set max length to {self.max_length}')
|
@@ -164,7 +164,7 @@ class InternLMXComposer2ForCausalLM(InternLM2PreTrainedModel):
|
|
164 |
image = Image_transform(image, hd_num = hd_num)
|
165 |
elif ext.lower() in video_extensions:
|
166 |
image = load_video(image)
|
167 |
-
image = frame2img(image, self.
|
168 |
image = Video_transform(image, hd_num = hd_num)
|
169 |
else:
|
170 |
print ('Unknow input format', image)
|
|
|
45 |
from torchvision.transforms.functional import InterpolationMode
|
46 |
|
47 |
from .build_mlp import build_vision_projector, build_vision_tower
|
48 |
+
from .ixc_utils import Image_transform, Video_transform, load_video, frame2img, get_font
|
49 |
from .configuration_internlm_xcomposer2 import InternLMXcomposer2Config
|
50 |
from .modeling_internlm2 import (InternLM2_INPUTS_DOCSTRING, InternLM2Model,
|
51 |
InternLM2PreTrainedModel)
|
|
|
102 |
config.hidden_size, config.vocab_size, bias=False)
|
103 |
self.tokenizer = None
|
104 |
self.hd_num = 25
|
105 |
+
self.font = get_font()
|
106 |
|
107 |
self.max_length = config.max_length
|
108 |
print(f'Set max length to {self.max_length}')
|
|
|
164 |
image = Image_transform(image, hd_num = hd_num)
|
165 |
elif ext.lower() in video_extensions:
|
166 |
image = load_video(image)
|
167 |
+
image = frame2img(image, self.font)
|
168 |
image = Video_transform(image, hd_num = hd_num)
|
169 |
else:
|
170 |
print ('Unknow input format', image)
|