yue-here commited on
Commit
e470706
1 Parent(s): 29aeaf6

add app and reqs

Browse files
Files changed (2) hide show
  1. app.py +39 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PIL import Image, ImageOps
3
+ from transformers import VisionEncoderDecoderModel, GPT2Tokenizer, AutoFeatureExtractor
4
+
5
+ text_processor = GPT2Tokenizer.from_pretrained("gpt2", pad_token="<|pad|>")
6
+ # text_processor = AutoTokenizer.from_pretrained("yuewu/toc_titler")
7
+ image_processor = AutoFeatureExtractor.from_pretrained("yuewu/toc_titler")
8
+ model = VisionEncoderDecoderModel.from_pretrained("yuewu/toc_titler")
9
+
10
+ def array_to_square_image(image):
11
+ # Numpy array to PIL image
12
+ image = Image.fromarray(image)
13
+
14
+ # Pad to square image
15
+ if image.size[0] != image.size[1]:
16
+ if image.size[0] > image.size[1]:
17
+ delta = image.size[0] - image.size[1]
18
+ padding = (0, delta//2, 0, delta//2)
19
+ if image.size[0] < image.size[1]:
20
+ delta = image.size[1] - image.size[0]
21
+ padding = (delta//2, 0, delta//2, 0)
22
+ image = ImageOps.expand(image, padding, fill=(255, 255, 255))
23
+
24
+ # In case size is off by 1
25
+ if image.size[0] != image.size[1]:
26
+ image.resize((image.size[0], image.size[0]))
27
+
28
+ return image
29
+
30
+ def greet(image):
31
+ image = array_to_square_image(image)
32
+ pixel_values = image_processor(image, return_tensors="pt").pixel_values
33
+ generated_ids = model.generate(pixel_values)
34
+ generated_text = text_processor.batch_decode(generated_ids, skip_special_tokens=True)
35
+
36
+ return generated_text[0]
37
+
38
+ demo = gr.Interface(fn=greet, inputs="image", outputs="text")
39
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Pillow >= 9.2.0
2
+ transformers >= 4.0.0