svjack commited on
Commit
e5bfa19
1 Parent(s): eb61cfe

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +68 -0
README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ library_name: transformers
5
+ tags:
6
+ - donut
7
+ - donut-python
8
+ ---
9
+
10
+ ### Installtion
11
+ ```bash
12
+ pip install torch
13
+ pip install transformers==4.11.3
14
+ pip install opencv-python==4.6.0.66
15
+ pip install donut-python
16
+ ```
17
+
18
+ ### Usage
19
+ ```python
20
+ import sys
21
+ import os
22
+ import pandas as pd
23
+ import numpy as np
24
+ import shutil
25
+
26
+ from tqdm import tqdm
27
+ import re
28
+
29
+ from donut import DonutModel
30
+ import torch
31
+ from PIL import Image
32
+
33
+ en_model_path = "question_generator_by_en_on_pic"
34
+
35
+ task_prompt = "<s_docvqa><s_question>{user_input}</s_question><s_answer>"
36
+ en_pretrained_model = DonutModel.from_pretrained(en_model_path)
37
+
38
+ if torch.cuda.is_available():
39
+ en_pretrained_model.half()
40
+ device = torch.device("cuda")
41
+ en_pretrained_model.to(device)
42
+
43
+ en_pretrained_model.eval()
44
+ print("have load !")
45
+
46
+ def demo_process_vqa(input_img, question):
47
+ #input_img = Image.fromarray(input_img)
48
+ global en_pretrained_model, task_prompt
49
+ user_prompt = task_prompt.replace("{user_input}", question)
50
+ output = en_pretrained_model.inference(input_img, prompt=user_prompt)["predictions"][0]
51
+ req = {
52
+ "question": output["answer"],
53
+ "answer": output["question"]
54
+ }
55
+ return req
56
+
57
+
58
+ img_path = "en_img.png"
59
+ demo_process_vqa(Image.open(img_path), "605-7227", "en")
60
+
61
+ '''
62
+ {
63
+ "question": "What is the Phone #?",
64
+ "answer": "605-7227"
65
+ }
66
+ '''
67
+
68
+ ```