DLight1551 commited on
Commit
e914bd1
1 Parent(s): 18cd0b0
Files changed (2) hide show
  1. ixc_utils.py +1 -0
  2. ixc_utils.py~ +0 -139
ixc_utils.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import torch
2
  import numpy as np
3
  import torchvision
 
1
+ import os
2
  import torch
3
  import numpy as np
4
  import torchvision
ixc_utils.py~ DELETED
@@ -1,139 +0,0 @@
1
- import torch
2
- import numpy as np
3
- import torchvision
4
- from PIL import Image, ImageDraw, ImageFont
5
- from torchvision.transforms.functional import InterpolationMode
6
- import torchvision.transforms as transforms
7
- from decord import VideoReader
8
-
9
- def padding_336(b, pad=336):
10
- width, height = b.size
11
- tar = int(np.ceil(height / pad) * pad)
12
- top_padding = 0 # int((tar - height)/2)
13
- bottom_padding = tar - height - top_padding
14
- left_padding = 0
15
- right_padding = 0
16
- b = transforms.functional.pad(b, [left_padding, top_padding, right_padding, bottom_padding], fill=[255,255,255])
17
-
18
- return b
19
-
20
- def Image_transform(img, hd_num=25):
21
- width, height = img.size
22
- trans = False
23
- if width < height:
24
- img = img.transpose(Image.TRANSPOSE)
25
- trans = True
26
- width, height = img.size
27
- ratio = (width/ height)
28
- scale = 1
29
- while scale*np.ceil(scale/ratio) <= hd_num:
30
- scale += 1
31
- scale -= 1
32
- scale = min(np.ceil(width / 560), scale)
33
- new_w = int(scale * 560)
34
- new_h = int(new_w / ratio)
35
- #print (scale, f'{height}/{new_h}, {width}/{new_w}')
36
-
37
- img = transforms.functional.resize(img, [new_h, new_w],)
38
- img = padding_336(img, 560)
39
- width, height = img.size
40
- if trans:
41
- img = img.transpose(Image.TRANSPOSE)
42
-
43
- return img
44
-
45
-
46
- def Video_transform(img, hd_num=25):
47
- width, height = img.size
48
- trans = False
49
- if width < height:
50
- img = img.transpose(Image.TRANSPOSE)
51
- trans = True
52
- width, height = img.size
53
- ratio = (width/ height)
54
- scale = 1
55
- new_h = int(scale * 560)
56
- new_w = int(new_h * ratio)
57
- #print (new_h, new_w)
58
-
59
- img = transforms.functional.resize(img, [new_h, new_w],)
60
- img = img.transpose(Image.TRANSPOSE)
61
- img = padding_336(img, 560)
62
- width, height = img.size
63
- if not trans:
64
- img = img.transpose(Image.TRANSPOSE)
65
-
66
- return img
67
-
68
- def frame2img(imgs):
69
- new_imgs = []
70
- for img in imgs:
71
- w, h = img.size
72
- scale = w/h
73
- if w > h:
74
- new_w = 560 * 2
75
- new_h = int(560 * 2 / scale)
76
- else:
77
- new_w = int(560 * 2 * scale)
78
- new_h = 560 * 2
79
- img = transforms.functional.resize(img, [new_h, new_w],)
80
- new_imgs.append(img)
81
- imgs = new_imgs
82
- new_w = 0
83
- new_h = 0
84
- pad = 40
85
- font = ImageFont.truetype(os.path.join(config._name_or_path, "SimHei.ttf"), pad)
86
- if w > h:
87
- for im in imgs:
88
- w,h = im.size
89
- new_w = max(new_w, w)
90
- new_h += h + 10 + pad
91
- new_img = Image.new('RGB', (new_w, new_h), 'white')
92
- draw = ImageDraw.Draw(new_img)
93
- curr_h = 0
94
- for idx, im in enumerate(imgs):
95
- w,h = im.size
96
- new_img.paste(im, (0, pad + curr_h))
97
- draw.text((0, curr_h ), f'<IMAGE {idx}>', font=font, fill='black')
98
- if idx + 1 < len(imgs):
99
- draw.line([(0, pad +curr_h + h +5), (new_w, pad +curr_h + h +5)], fill = 'black', width=2)
100
- curr_h += h + 10 + pad
101
- #print (new_w, new_h)
102
- else:
103
- for im in imgs:
104
- w,h = im.size
105
- new_w += w + 10
106
- new_h = max(new_h, h)
107
- new_h += pad
108
- new_img = Image.new('RGB', (new_w, new_h), 'white')
109
- draw = ImageDraw.Draw(new_img)
110
- curr_w = 0
111
- for idx, im in enumerate(imgs):
112
- w,h = im.size
113
- new_img.paste(im, (curr_w, pad))
114
- draw.text((curr_w, 0), f'<IMAGE {idx}>', font=font, fill='black')
115
- if idx + 1 < len(imgs):
116
- draw.line([(curr_w + w + 5, 0), (curr_w + w + 5, new_h)], fill = 'black', width=2)
117
- curr_w += w + 10
118
- return new_img
119
-
120
- def load_video(video_path, num_frm=32, start=None, end=None):
121
- vid = VideoReader(video_path, num_threads=1)
122
- fps = vid.get_avg_fps()
123
- t_stride = int(round(float(fps) / int(1)))
124
- start_idx = 0 if start is None else start
125
- end_idx = len(vid) if end is None else end
126
- all_pos = list(range(start_idx, end_idx, t_stride))
127
- try:
128
- images = [vid[i].numpy() for i in all_pos]
129
- except:
130
- images = [vid[i].asnumpy() for i in all_pos]
131
- if len(images) > num_frm:
132
- num_frm = min(num_frm, len(images))
133
- step_size = len(images) / (num_frm + 1)
134
- indices = [int(i*step_size) for i in range(num_frm)]
135
- images = [images[i] for i in indices]
136
- images = [Image.fromarray(arr) for arr in images]
137
- image = frame2img(images)
138
- return image
139
-