Spaces:
Build error
Build error
""" | |
Flask app for S2ORC pdf2json utility | |
""" | |
import hashlib | |
from flask import Flask, request, jsonify, flash, url_for, redirect, render_template, send_file | |
from doc2json.grobid2json.process_pdf import process_pdf_stream | |
from doc2json.tex2json.process_tex import process_tex_stream | |
from doc2json.jats2json.process_jats import process_jats_stream | |
app = Flask(__name__) | |
ALLOWED_EXTENSIONS = {'pdf', 'gz', 'nxml'} | |
def home(): | |
return render_template("home.html") | |
def upload_file(): | |
uploaded_file = request.files['file'] | |
if uploaded_file.filename != '': | |
filename = uploaded_file.filename | |
# read pdf file | |
if filename.endswith('pdf'): | |
pdf_stream = uploaded_file.stream | |
pdf_content = pdf_stream.read() | |
# compute hash | |
pdf_sha = hashlib.sha1(pdf_content).hexdigest() | |
# get results | |
results = process_pdf_stream(filename, pdf_sha, pdf_content) | |
return jsonify(results) | |
# read latex file | |
elif filename.endswith('gz'): | |
zip_stream = uploaded_file.stream | |
zip_content = zip_stream.read() | |
# get results | |
results = process_tex_stream(filename, zip_content) | |
return jsonify(results) | |
# read nxml file (jats) | |
elif filename.endswith('nxml'): | |
xml_stream = uploaded_file.stream | |
xml_content = xml_stream.read() | |
# get results | |
results = process_jats_stream(filename, xml_content) | |
return jsonify(results) | |
# unknown | |
else: | |
return { | |
"Error": "Unknown file type!" | |
} | |
return redirect(url_for('index')) | |
if __name__ == '__main__': | |
app.run(port=8080, host='0.0.0.0') | |