Spaces:
Build error
Build error
File size: 1,819 Bytes
02ae0bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
"""
Flask app for S2ORC pdf2json utility
"""
import hashlib
from flask import Flask, request, jsonify, flash, url_for, redirect, render_template, send_file
from doc2json.grobid2json.process_pdf import process_pdf_stream
from doc2json.tex2json.process_tex import process_tex_stream
from doc2json.jats2json.process_jats import process_jats_stream
app = Flask(__name__)
ALLOWED_EXTENSIONS = {'pdf', 'gz', 'nxml'}
@app.route('/')
def home():
return render_template("home.html")
@app.route('/', methods=['POST'])
def upload_file():
uploaded_file = request.files['file']
if uploaded_file.filename != '':
filename = uploaded_file.filename
# read pdf file
if filename.endswith('pdf'):
pdf_stream = uploaded_file.stream
pdf_content = pdf_stream.read()
# compute hash
pdf_sha = hashlib.sha1(pdf_content).hexdigest()
# get results
results = process_pdf_stream(filename, pdf_sha, pdf_content)
return jsonify(results)
# read latex file
elif filename.endswith('gz'):
zip_stream = uploaded_file.stream
zip_content = zip_stream.read()
# get results
results = process_tex_stream(filename, zip_content)
return jsonify(results)
# read nxml file (jats)
elif filename.endswith('nxml'):
xml_stream = uploaded_file.stream
xml_content = xml_stream.read()
# get results
results = process_jats_stream(filename, xml_content)
return jsonify(results)
# unknown
else:
return {
"Error": "Unknown file type!"
}
return redirect(url_for('index'))
if __name__ == '__main__':
app.run(port=8080, host='0.0.0.0')
|