Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,9 +3,11 @@ import re
|
|
3 |
from PIL import Image
|
4 |
import requests
|
5 |
from nougat.dataset.rasterize import rasterize_paper
|
6 |
-
|
7 |
from transformers import NougatProcessor, VisionEncoderDecoderModel
|
8 |
import torch
|
|
|
|
|
|
|
9 |
|
10 |
processor = NougatProcessor.from_pretrained("facebook/nougat-small")
|
11 |
model = VisionEncoderDecoderModel.from_pretrained("facebook/nougat-small")
|
@@ -48,7 +50,7 @@ def predict(image):
|
|
48 |
|
49 |
|
50 |
|
51 |
-
def inference(pdf_file, pdf_link):
|
52 |
if pdf_file is None:
|
53 |
if pdf_link == '':
|
54 |
print("No file is uploaded and No link is provided")
|
@@ -67,13 +69,14 @@ def inference(pdf_file, pdf_link):
|
|
67 |
|
68 |
|
69 |
content = sequence.replace(r'\(', '$').replace(r'\)', '$').replace(r'\[', '$$').replace(r'\]', '$$')
|
70 |
-
return content
|
71 |
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
77 |
|
78 |
css = """
|
79 |
#mkd {
|
@@ -98,20 +101,24 @@ with gr.Blocks(css=css) as demo:
|
|
98 |
with gr.Row(equal_height=True):
|
99 |
pdf_file = gr.File(label='PDF ๐', file_count='single', scale=1)
|
100 |
pdf_link = gr.Textbox(placeholder='Enter an arxiv link here', label='Link to Paper๐', scale=1)
|
101 |
-
|
|
|
102 |
with gr.Row():
|
103 |
btn = gr.Button('Run Nougat ๐ซ')
|
104 |
-
|
|
|
105 |
|
106 |
-
output_headline = gr.Markdown("PDF converted to markup language through Nougat-OCR๐")
|
107 |
parsed_output = gr.Markdown(elem_id='mkd', value='OCR Output ๐')
|
|
|
108 |
|
109 |
-
btn.click(inference, [pdf_file, pdf_link], parsed_output )
|
110 |
clr.click(lambda : (gr.update(value=None),
|
111 |
gr.update(value=None),
|
|
|
112 |
gr.update(value=None)),
|
113 |
[],
|
114 |
-
[pdf_file, pdf_link, parsed_output]
|
115 |
)
|
116 |
gr.Examples(
|
117 |
[["nougat.pdf", ""], [None, "https://arxiv.org/pdf/2308.08316.pdf"]],
|
|
|
3 |
from PIL import Image
|
4 |
import requests
|
5 |
from nougat.dataset.rasterize import rasterize_paper
|
|
|
6 |
from transformers import NougatProcessor, VisionEncoderDecoderModel
|
7 |
import torch
|
8 |
+
import gradio as gr
|
9 |
+
import uuid
|
10 |
+
import os
|
11 |
|
12 |
processor = NougatProcessor.from_pretrained("facebook/nougat-small")
|
13 |
model = VisionEncoderDecoderModel.from_pretrained("facebook/nougat-small")
|
|
|
50 |
|
51 |
|
52 |
|
53 |
+
def inference(pdf_file, pdf_link, file_btn):
|
54 |
if pdf_file is None:
|
55 |
if pdf_link == '':
|
56 |
print("No file is uploaded and No link is provided")
|
|
|
69 |
|
70 |
|
71 |
content = sequence.replace(r'\(', '$').replace(r'\)', '$').replace(r'\[', '$$').replace(r'\]', '$$')
|
|
|
72 |
|
73 |
+
if file_btn:
|
74 |
+
with open("output.txt","w+") as f:
|
75 |
+
f.write(content)
|
76 |
+
f.close()
|
77 |
+
|
78 |
+
return content, "output.txt"
|
79 |
+
|
80 |
|
81 |
css = """
|
82 |
#mkd {
|
|
|
101 |
with gr.Row(equal_height=True):
|
102 |
pdf_file = gr.File(label='PDF ๐', file_count='single', scale=1)
|
103 |
pdf_link = gr.Textbox(placeholder='Enter an arxiv link here', label='Link to Paper๐', scale=1)
|
104 |
+
with gr.Row():
|
105 |
+
file_btn = gr.Checkbox(label='Download output as file ๐')
|
106 |
with gr.Row():
|
107 |
btn = gr.Button('Run Nougat ๐ซ')
|
108 |
+
with gr.Row():
|
109 |
+
clr = gr.Button('Clear Inputs & Outputs ๐งผ')
|
110 |
|
111 |
+
output_headline = gr.Markdown("## PDF converted to markup language through Nougat-OCR๐")
|
112 |
parsed_output = gr.Markdown(elem_id='mkd', value='OCR Output ๐')
|
113 |
+
output_file = gr.File(file_types = ["txt"], label="Output File")
|
114 |
|
115 |
+
btn.click(inference, [pdf_file, pdf_link, file_btn], parsed_output, output_file)
|
116 |
clr.click(lambda : (gr.update(value=None),
|
117 |
gr.update(value=None),
|
118 |
+
gr.update(value=None),
|
119 |
gr.update(value=None)),
|
120 |
[],
|
121 |
+
[pdf_file, pdf_link, file_btn, output_file, parsed_output]
|
122 |
)
|
123 |
gr.Examples(
|
124 |
[["nougat.pdf", ""], [None, "https://arxiv.org/pdf/2308.08316.pdf"]],
|