ysharma HF staff commited on
Commit
3c0821a
1 Parent(s): b7fe45a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -25
app.py CHANGED
@@ -1,37 +1,48 @@
1
  import gradio as gr
2
  import subprocess
3
 
4
- def fun(filepath):
5
- print(f"filepath is - {filepath}")
6
- # Command to run
7
- bash_command = f"nougat {filepath}"
8
-
9
- # Run the command and capture its output
10
- completed_process = subprocess.run(bash_command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
11
-
12
- # Get the output and error messages
13
- output = completed_process.stdout
14
- errors = completed_process.stderr
15
-
16
- # Print the output and errors
17
- print("Output:")
18
- print(len(output))
 
 
 
 
19
 
20
- print("Errors:")
21
- print(len(errors))
22
- return output
 
 
 
 
 
23
 
24
- with gr.Blocks() as demo:
25
 
 
 
26
  with gr.Row():
27
  pdf_file = gr.File(label='Upload a PDF', scale=1)
28
- mkd = gr.HTML(' <i>OR</i> ', scale=1)
29
  pdf_link = gr.Textbox(placeholder='Enter an arxiv link here', label='Provide a link', scale=1)
30
 
31
- with gr.Row():
32
- btn = gr.Button()
33
- parsed_output = gr.Textbox(lines=5)
 
34
 
35
- btn.click(fun, pdf_file, parsed_output)
36
-
37
  demo.launch(debug=True)
 
 
1
  import gradio as gr
2
  import subprocess
3
 
4
+
5
+ def nougat_ocr(file_name):
6
+ # CLI Command to run
7
+ cli_command = [
8
+ 'nougat',
9
+ '--out', '/output',
10
+ 'pdf', f'{file_name}',
11
+ '--checkpoint', '/nougat'
12
+ ]
13
+
14
+ # Run the command and get .mmd file in an output folder
15
+ subprocess.run(cli_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
16
+ return
17
+
18
+
19
+ def predict(pdf_file):
20
+ print(f"temporary file - {pdf_file.name}")
21
+ pdf_name = pdf_file.name.split('/')[-1].split('.')[0]
22
+ print(f"pdf file name - {pdf_name}")
23
 
24
+ #! Get prediction for a PDF using nougat
25
+ nougat_ocr(pdf_file.name)
26
+
27
+ # Open the multimarkdown (.mmd) file for reading
28
+ with open(f'/content/output/{pdf_name}.mmd', 'r') as file:
29
+ content = file.read()
30
+
31
+ return content
32
 
 
33
 
34
+ with gr.Blocks() as demo:
35
+
36
  with gr.Row():
37
  pdf_file = gr.File(label='Upload a PDF', scale=1)
38
+ mkd = gr.Markdown('<h2><center><i>OR</i></center></h2>',scale=1)
39
  pdf_link = gr.Textbox(placeholder='Enter an arxiv link here', label='Provide a link', scale=1)
40
 
41
+ btn = gr.Button()
42
+ parsed_output = gr.Markdown()
43
+
44
+ btn.click(predict, pdf_file, parsed_output )
45
 
46
+ demo.queue()
 
47
  demo.launch(debug=True)
48
+