kwyong commited on
Commit
f1b497b
·
1 Parent(s): 98fad19

Add app.py

Browse files
Files changed (2) hide show
  1. app.py +40 -0
  2. page_10.jpg +0 -0
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import hf_hub_download
2
+ import re
3
+ from PIL import Image
4
+
5
+ import gradio as gr
6
+
7
+ from transformers import NougatProcessor, VisionEncoderDecoderModel
8
+ from datasets import load_dataset
9
+ import torch
10
+
11
+ model_checkpoint = "facebook/nougat-base"
12
+ processor = NougatProcessor.from_pretrained(model_checkpoint)
13
+ model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint)
14
+
15
+ # Use GPU if possible
16
+ device = "cuda" if torch.cuda_is_available() else "cpu"
17
+ model.to(device)
18
+
19
+ # prepare PDF image for the model
20
+ def predict(img):
21
+ pixel_values = processor(img, return_tensors="pt").pixel_values
22
+
23
+ outputs = model.generate(
24
+ pixel_values.to(device)
25
+ min_length=1
26
+ max_new_tokens=30,
27
+ bad_words_ids=[[processor.tokenizer.unk_token_id]],
28
+ )
29
+
30
+ sequence = processor.batch_decode(outputs, skip_special_tokens=True)[0]
31
+ sequence = processor.post_process_generation(sequence, fix_markdown=False)
32
+ return sequence
33
+
34
+ image = gr.Image()
35
+ text = ["text"]
36
+ examples = ['page_10.jpg']
37
+
38
+ intf = gr.Interface(fn=predict, inputs=image, outpus=text, examples=examples)
39
+ intf.launch(inline=False)
40
+
page_10.jpg ADDED