nianlonggu
init
02ae0bf
"""
Flask app for S2ORC pdf2json utility
"""
import hashlib
from flask import Flask, request, jsonify, flash, url_for, redirect, render_template, send_file
from doc2json.grobid2json.process_pdf import process_pdf_stream
from doc2json.tex2json.process_tex import process_tex_stream
from doc2json.jats2json.process_jats import process_jats_stream
app = Flask(__name__)
ALLOWED_EXTENSIONS = {'pdf', 'gz', 'nxml'}
@app.route('/')
def home():
return render_template("home.html")
@app.route('/', methods=['POST'])
def upload_file():
uploaded_file = request.files['file']
if uploaded_file.filename != '':
filename = uploaded_file.filename
# read pdf file
if filename.endswith('pdf'):
pdf_stream = uploaded_file.stream
pdf_content = pdf_stream.read()
# compute hash
pdf_sha = hashlib.sha1(pdf_content).hexdigest()
# get results
results = process_pdf_stream(filename, pdf_sha, pdf_content)
return jsonify(results)
# read latex file
elif filename.endswith('gz'):
zip_stream = uploaded_file.stream
zip_content = zip_stream.read()
# get results
results = process_tex_stream(filename, zip_content)
return jsonify(results)
# read nxml file (jats)
elif filename.endswith('nxml'):
xml_stream = uploaded_file.stream
xml_content = xml_stream.read()
# get results
results = process_jats_stream(filename, xml_content)
return jsonify(results)
# unknown
else:
return {
"Error": "Unknown file type!"
}
return redirect(url_for('index'))
if __name__ == '__main__':
app.run(port=8080, host='0.0.0.0')