merve HF staff commited on
Commit
b2c2fa2
โ€ข
1 Parent(s): 52ca21c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -13
app.py CHANGED
@@ -3,9 +3,11 @@ import re
3
  from PIL import Image
4
  import requests
5
  from nougat.dataset.rasterize import rasterize_paper
6
-
7
  from transformers import NougatProcessor, VisionEncoderDecoderModel
8
  import torch
 
 
 
9
 
10
  processor = NougatProcessor.from_pretrained("facebook/nougat-small")
11
  model = VisionEncoderDecoderModel.from_pretrained("facebook/nougat-small")
@@ -48,7 +50,7 @@ def predict(image):
48
 
49
 
50
 
51
- def inference(pdf_file, pdf_link):
52
  if pdf_file is None:
53
  if pdf_link == '':
54
  print("No file is uploaded and No link is provided")
@@ -67,13 +69,14 @@ def inference(pdf_file, pdf_link):
67
 
68
 
69
  content = sequence.replace(r'\(', '$').replace(r'\)', '$').replace(r'\[', '$$').replace(r'\]', '$$')
70
- return content
71
 
72
- import gradio as gr
73
- import uuid
74
- import os
75
- import requests
76
- import re
 
 
77
 
78
  css = """
79
  #mkd {
@@ -98,20 +101,24 @@ with gr.Blocks(css=css) as demo:
98
  with gr.Row(equal_height=True):
99
  pdf_file = gr.File(label='PDF ๐Ÿ“‘', file_count='single', scale=1)
100
  pdf_link = gr.Textbox(placeholder='Enter an arxiv link here', label='Link to Paper๐Ÿ”—', scale=1)
101
-
 
102
  with gr.Row():
103
  btn = gr.Button('Run Nougat ๐Ÿซ')
104
- clr = gr.Button('Clear ๐Ÿงผ')
 
105
 
106
- output_headline = gr.Markdown("PDF converted to markup language through Nougat-OCR๐Ÿ‘‡")
107
  parsed_output = gr.Markdown(elem_id='mkd', value='OCR Output ๐Ÿ“')
 
108
 
109
- btn.click(inference, [pdf_file, pdf_link], parsed_output )
110
  clr.click(lambda : (gr.update(value=None),
111
  gr.update(value=None),
 
112
  gr.update(value=None)),
113
  [],
114
- [pdf_file, pdf_link, parsed_output]
115
  )
116
  gr.Examples(
117
  [["nougat.pdf", ""], [None, "https://arxiv.org/pdf/2308.08316.pdf"]],
 
3
  from PIL import Image
4
  import requests
5
  from nougat.dataset.rasterize import rasterize_paper
 
6
  from transformers import NougatProcessor, VisionEncoderDecoderModel
7
  import torch
8
+ import gradio as gr
9
+ import uuid
10
+ import os
11
 
12
  processor = NougatProcessor.from_pretrained("facebook/nougat-small")
13
  model = VisionEncoderDecoderModel.from_pretrained("facebook/nougat-small")
 
50
 
51
 
52
 
53
+ def inference(pdf_file, pdf_link, file_btn):
54
  if pdf_file is None:
55
  if pdf_link == '':
56
  print("No file is uploaded and No link is provided")
 
69
 
70
 
71
  content = sequence.replace(r'\(', '$').replace(r'\)', '$').replace(r'\[', '$$').replace(r'\]', '$$')
 
72
 
73
+ if file_btn:
74
+ with open("output.txt","w+") as f:
75
+ f.write(content)
76
+ f.close()
77
+
78
+ return content, "output.txt"
79
+
80
 
81
  css = """
82
  #mkd {
 
101
  with gr.Row(equal_height=True):
102
  pdf_file = gr.File(label='PDF ๐Ÿ“‘', file_count='single', scale=1)
103
  pdf_link = gr.Textbox(placeholder='Enter an arxiv link here', label='Link to Paper๐Ÿ”—', scale=1)
104
+ with gr.Row():
105
+ file_btn = gr.Checkbox(label='Download output as file ๐Ÿ“‘')
106
  with gr.Row():
107
  btn = gr.Button('Run Nougat ๐Ÿซ')
108
+ with gr.Row():
109
+ clr = gr.Button('Clear Inputs & Outputs ๐Ÿงผ')
110
 
111
+ output_headline = gr.Markdown("## PDF converted to markup language through Nougat-OCR๐Ÿ‘‡")
112
  parsed_output = gr.Markdown(elem_id='mkd', value='OCR Output ๐Ÿ“')
113
+ output_file = gr.File(file_types = ["txt"], label="Output File")
114
 
115
+ btn.click(inference, [pdf_file, pdf_link, file_btn], parsed_output, output_file)
116
  clr.click(lambda : (gr.update(value=None),
117
  gr.update(value=None),
118
+ gr.update(value=None),
119
  gr.update(value=None)),
120
  [],
121
+ [pdf_file, pdf_link, file_btn, output_file, parsed_output]
122
  )
123
  gr.Examples(
124
  [["nougat.pdf", ""], [None, "https://arxiv.org/pdf/2308.08316.pdf"]],