nougat

Sleeping

File size: 4,714 Bytes

import gradio as gr
import subprocess
import uuid
import os
import requests
import re

def get_pdf(pdf_link):
  # Generate a unique filename
  unique_filename = f"input/downloaded_paper_{uuid.uuid4().hex}.pdf"

  # Send a GET request to the PDF link
  response = requests.get(pdf_link)

  if response.status_code == 200:
      # Save the PDF content to a local file
      with open(unique_filename, 'wb') as pdf_file:
          pdf_file.write(response.content)
      print("PDF downloaded successfully.")
  else:
      print("Failed to download the PDF.")
  return unique_filename #.split('/')[-1][:-4]


def nougat_ocr(file_name):

  #unique_filename = f"/content/output/downloaded_paper_{uuid.uuid4().hex}.pdf"
  # Command to run
  cli_command = [
      'nougat',
      #'--out', unique_filename,
      '--out', 'output',
      'pdf', f'{file_name}',
      '--checkpoint', 'nougat',
      '--markdown'
  ]

  # Run the command and capture its output
  #completed_process = 
  subprocess.run(cli_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

  return #unique_filename


def predict(pdf_file, pdf_link):
  if pdf_file is None:
    if pdf_link == '':
      print("No file is uploaded and No link is provided")
      return "No data provided. Upload a pdf file or provide a pdf link and try again!"
    else:
      print(f'pdf_link is - {pdf_link}')
      file_name = get_pdf(pdf_link)
      print(f'file_name is - {file_name}')
  else:
    file_name = pdf_file.name
    print(file_name)
    pdf_name = pdf_file.name.split('/')[-1].split('.')[0]
    print(pdf_name)

  # Call nougat
  nougat_ocr(file_name)
  #print("BACKKKK")

  # Open the file for reading
  file_name = file_name.split('/')[-1][:-4]
  with open(f'output/{file_name}.mmd', 'r') as file:
      content = file.read()
  # switch math delimiters
  content = content.replace(r'\(', '$').replace(r'\)', '$').replace(r'\[', '$$').replace(r'\]', '$$')
  return content




def nougat_ocr1(file_name):
  print('******* inside nougat_ocr *******')
  # CLI Command to run
  cli_command = [
      'nougat',
      '--out', 'output',
      'pdf', f'{file_name}',
      '--checkpoint', 'nougat',
      '--markdown'
  ]

  # Run the command and get .mmd file in an output folder
  subprocess.run(cli_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
  return


def predict1(pdf_file):
  print('******* inside predict *******')
  print(f"temporary file - {pdf_file.name}")
  pdf_name = pdf_file.name.split('/')[-1].split('.')[0]
  print(f"pdf file name - {pdf_name}")
    
  #! Get prediction for a PDF using nougat
  nougat_ocr(pdf_file.name)
  print("BAACCKKK")
  
  # Open the multimarkdown (.mmd) file for reading
  with open(f'output/{pdf_name}.mmd', 'r') as file:
      content = file.read() 
      
  return content

def process_example(pdf_file,pdf_link):
 ocr_content = predict(pdf_file,pdf_link)
 return gr.update(value=ocr_content)
      
css = """
  #mkd {
    height: 500px; 
    overflow: auto; 
    border: 1px solid #ccc; 
  }
"""

with gr.Blocks(css=css) as demo:
  gr.HTML("<h1><center>Nougat: Neural Optical Understanding for Academic Documents<center><h1>")
  gr.HTML("<h3><center>Lukas Blecher et al. <a href='https://arxiv.org/pdf/2308.13418.pdf' target='_blank'>Paper</a>, <a href='https://facebookresearch.github.io/nougat/'>Project</a><center></h3>")

  with gr.Row():
    mkd = gr.Markdown('<h4><center>Upload a PDF</center></h4>',scale=1)
    mkd = gr.Markdown('<h4><center><i>OR</i></center></h4>',scale=1)
    mkd = gr.Markdown('<h4><center>Provide a PDF link</center></h4>',scale=1)
  
  with gr.Row(equal_height=True):
    pdf_file = gr.File(label='PDF📃', file_count='single', scale=1)
    pdf_link = gr.Textbox(placeholder='Enter an Arxiv link here', label='PDF link🔗🌐', scale=1)

  with gr.Row():
    btn = gr.Button('Run NOUGAT🍫')
    clr = gr.Button('Clear🚿')

  output_headline = gr.Markdown("<h3>PDF converted to markup language through Nougat-OCR👇:</h3>")
  parsed_output = gr.Markdown(elem_id='mkd', value='📃🔤OCR Output')
  
  btn.click(predict, [pdf_file, pdf_link], parsed_output )
  clr.click(lambda : (gr.update(value=None), 
                      gr.update(value=None),
                      gr.update(value=None)), 
             [], 
             [pdf_file, pdf_link, parsed_output]
            )

  # gr.Examples(
  #     [["input/nougat.pdf", ""], [None, "https://arxiv.org/pdf/2308.08316.pdf"]],
  #     inputs = [pdf_file, pdf_link],
  #     outputs = parsed_output,
  #     fn=process_example,
  #     cache_examples=True,
  #     label='Click on any Examples below to get Nougat OCR results quickly:'
  # )
    
demo.queue()
demo.launch(debug=True)