Spaces:
Runtime error
Runtime error
ivelin
commited on
Commit
•
ba6d9e2
1
Parent(s):
e0dd23e
fix:example formatting
Browse filesSigned-off-by: ivelin <ivelin.eth@gmail.com>
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import re
|
2 |
import gradio as gr
|
|
|
3 |
|
4 |
import torch
|
5 |
from transformers import DonutProcessor, VisionEncoderDecoderModel
|
@@ -13,7 +14,10 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
13 |
model.to(device)
|
14 |
|
15 |
|
16 |
-
def process_document(image, prompt):
|
|
|
|
|
|
|
17 |
# prepare encoder inputs
|
18 |
pixel_values = processor(image, return_tensors="pt").pixel_values
|
19 |
|
@@ -68,20 +72,13 @@ def process_document(image, prompt):
|
|
68 |
|
69 |
description = "Gradio Demo for Donut RefExp task, an instance of `VisionEncoderDecoderModel` fine-tuned on UIBert RefExp Dataset (UI Referring Expression). To use it, simply upload your image and type a question and click 'submit', or click one of the examples to load them. Read more at the links below."
|
70 |
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.15664' target='_blank'>Donut: OCR-free Document Understanding Transformer</a> | <a href='https://github.com/clovaai/donut' target='_blank'>Github Repo</a></p>"
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
["example_1.jpg", "select the setting icon from top right corner"],
|
82 |
-
["example_2.jpg", "enter the text field next to the name"]
|
83 |
-
],
|
84 |
-
cache_examples=False)
|
85 |
-
|
86 |
-
demo.launch()
|
87 |
-
#
|
|
|
1 |
import re
|
2 |
import gradio as gr
|
3 |
+
from PIL import Image
|
4 |
|
5 |
import torch
|
6 |
from transformers import DonutProcessor, VisionEncoderDecoderModel
|
|
|
14 |
model.to(device)
|
15 |
|
16 |
|
17 |
+
def process_document(image: Image, prompt: str):
|
18 |
+
# trim prompt to 80 characters and normalize to lowercase
|
19 |
+
prompt = prompt[:80].lower()
|
20 |
+
|
21 |
# prepare encoder inputs
|
22 |
pixel_values = processor(image, return_tensors="pt").pixel_values
|
23 |
|
|
|
72 |
|
73 |
description = "Gradio Demo for Donut RefExp task, an instance of `VisionEncoderDecoderModel` fine-tuned on UIBert RefExp Dataset (UI Referring Expression). To use it, simply upload your image and type a question and click 'submit', or click one of the examples to load them. Read more at the links below."
|
74 |
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2111.15664' target='_blank'>Donut: OCR-free Document Understanding Transformer</a> | <a href='https://github.com/clovaai/donut' target='_blank'>Github Repo</a></p>"
|
75 |
+
examples = [
|
76 |
+
["example_1.jpg", "select the setting icon from top right corner"],
|
77 |
+
["example_2.jpg", "enter the text field next to the name"]
|
78 |
+
],
|
79 |
+
|
80 |
+
demo = gr.Interface(fn=visual_grounding, inputs=[gr.inputs.Image(type='pil'), "textbox"],
|
81 |
+
outputs=[gr.inputs.Image(type='pil'), "textbox"],
|
82 |
+
title=title, description=description, article=article, examples=examples,
|
83 |
+
allow_flagging=False, allow_screenshot=False)
|
84 |
+
demo.launch(cache_examples=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|