commited on
update font
Browse files- ixc_utils.py +8 -3
- modeling_internlm_xcomposer2.py +3 -3
@@ -2,11 +2,18 @@ import os
2 |
import torch
3 |
import numpy as np
4 |
import torchvision
5 |
from PIL import Image, ImageDraw, ImageFont
6 |
from torchvision.transforms.functional import InterpolationMode
7 |
import torchvision.transforms as transforms
8 |
from decord import VideoReader
9 |
10 |
def padding_336(b, pad=336):
11 |
width, height = b.size
12 |
tar = int(np.ceil(height / pad) * pad)
@@ -66,7 +73,7 @@ def Video_transform(img, hd_num=25):
66 |
67 |
return img
68 |
69 |
def frame2img(imgs,
70 |
new_imgs = []
71 |
for img in imgs:
72 |
w, h = img.size
@@ -83,8 +90,6 @@ def frame2img(imgs, font_path):
83 |
new_w = 0
84 |
new_h = 0
85 |
pad = 40
86 |
print (font_path)
87 |
font = ImageFont.truetype(os.path.join(font_path, "SimHei.ttf"), pad)
88 |
if w > h:
89 |
for im in imgs:
90 |
w,h = im.size
2 |
import torch
3 |
import numpy as np
4 |
import torchvision
5 |
from urllib.request import urlopen
6 |
from PIL import Image, ImageDraw, ImageFont
7 |
from torchvision.transforms.functional import InterpolationMode
8 |
import torchvision.transforms as transforms
9 |
from decord import VideoReader
10 |
11 |
def get_font():
12 |
truetype_url = 'https://cdn-lfs-us-1.huggingface.co/repos/19/7a/197a751ef710da1639736f1b5c9ebc26bd38d236aba7f10bcf8b553084c66907/336a838f4a78e150826be608dae69de59d50948c3d2b71760e096ae764154bdc?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27SimHei.ttf%3B+filename%3D%22SimHei.ttf%22%3B&response-content-type=font%2Fttf&Expires=1720275312&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMDI3NTMxMn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzE5LzdhLzE5N2E3NTFlZjcxMGRhMTYzOTczNmYxYjVjOWViYzI2YmQzOGQyMzZhYmE3ZjEwYmNmOGI1NTMwODRjNjY5MDcvMzM2YTgzOGY0YTc4ZTE1MDgyNmJlNjA4ZGFlNjlkZTU5ZDUwOTQ4YzNkMmI3MTc2MGUwOTZhZTc2NDE1NGJkYz9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=aZAXME5llGK90xUsPHRuWouco5T92ngs63hhW0gIAWmrUup4Ed5y4lSqB5khoLCLlMHK5lC4QJ58JTFFnmVFgFsKA-XfggYJLXu-TIC6DnvQCLz4L6EvLwCR05jzWOWn3trDorazP%7Enb8nuYKPgwGkpsukvCcqpx5Y0%7EfA4XsUCmcaddmkhFkkS1Wp2QWDnJjFGkuRnm8fQLW%7EG3JCdd7EyBkr2uWG%7E3W7ff62l-f%7EQTvtXIpYTHF3SAeqbB-DYQMUIbQJTuSs0TiQPt3WYvchrbuKN0aqR5OLvDJI2Fl0omJCL-wESyj9L%7EC2sCyY2LCDoE8b6-omgbQal2KHv7cA__&Key-Pair-Id=K24J24Z295AEI9'
13 |
ff = urlopen(truetype_url)
14 |
font = ImageFont.truetype(ff, size=40)
15 |
return font
16 |
17 |
def padding_336(b, pad=336):
18 |
width, height = b.size
19 |
tar = int(np.ceil(height / pad) * pad)
73 |
74 |
return img
75 |
76 |
def frame2img(imgs, font):
77 |
new_imgs = []
78 |
for img in imgs:
79 |
w, h = img.size
90 |
new_w = 0
91 |
new_h = 0
92 |
pad = 40
93 |
if w > h:
94 |
for im in imgs:
95 |
w,h = im.size
@@ -45,7 +45,7 @@ import torchvision.transforms as transforms
45 |
from torchvision.transforms.functional import InterpolationMode
46 |
47 |
from .build_mlp import build_vision_projector, build_vision_tower
48 |
from .ixc_utils import Image_transform, Video_transform, load_video, frame2img
49 |
from .configuration_internlm_xcomposer2 import InternLMXcomposer2Config
50 |
from .modeling_internlm2 import (InternLM2_INPUTS_DOCSTRING, InternLM2Model,
51 |
@@ -102,7 +102,7 @@ class InternLMXComposer2ForCausalLM(InternLM2PreTrainedModel):
102 |
config.hidden_size, config.vocab_size, bias=False)
103 |
self.tokenizer = None
104 |
self.hd_num = 25
105 |
106 |
107 |
self.max_length = config.max_length
108 |
print(f'Set max length to {self.max_length}')
@@ -164,7 +164,7 @@ class InternLMXComposer2ForCausalLM(InternLM2PreTrainedModel):
164 |
image = Image_transform(image, hd_num = hd_num)
165 |
elif ext.lower() in video_extensions:
166 |
image = load_video(image)
167 |
image = frame2img(image, self.
168 |
image = Video_transform(image, hd_num = hd_num)
169 |
170 |
print ('Unknow input format', image)
45 |
from torchvision.transforms.functional import InterpolationMode
46 |
47 |
from .build_mlp import build_vision_projector, build_vision_tower
48 |
from .ixc_utils import Image_transform, Video_transform, load_video, frame2img, get_font
49 |
from .configuration_internlm_xcomposer2 import InternLMXcomposer2Config
50 |
from .modeling_internlm2 import (InternLM2_INPUTS_DOCSTRING, InternLM2Model,
51 |
102 |
config.hidden_size, config.vocab_size, bias=False)
103 |
self.tokenizer = None
104 |
self.hd_num = 25
105 |
self.font = get_font()
106 |
107 |
self.max_length = config.max_length
108 |
print(f'Set max length to {self.max_length}')
164 |
image = Image_transform(image, hd_num = hd_num)
165 |
elif ext.lower() in video_extensions:
166 |
image = load_video(image)
167 |
image = frame2img(image, self.font)
168 |
image = Video_transform(image, hd_num = hd_num)
169 |
170 |
print ('Unknow input format', image)