pz / pdf2zh /backend.py
github-actions[bot]
GitHub deploy: 220ad03711632606c0abccd036d638abfb47c788
7456850
from flask import Flask, request, send_file
from celery import Celery, Task
from celery.result import AsyncResult
from pdf2zh import translate_stream
import tqdm
import json
import io
from pdf2zh.doclayout import ModelInstance
from pdf2zh.config import ConfigManager
flask_app = Flask("pdf2zh")
flask_app.config.from_mapping(
CELERY=dict(
broker_url=ConfigManager.get("CELERY_BROKER", "redis://127.0.0.1:6379/0"),
result_backend=ConfigManager.get("CELERY_RESULT", "redis://127.0.0.1:6379/0"),
)
)
def celery_init_app(app: Flask) -> Celery:
class FlaskTask(Task):
def __call__(self, *args, **kwargs):
with app.app_context():
return self.run(*args, **kwargs)
celery_app = Celery(app.name)
celery_app.config_from_object(app.config["CELERY"])
celery_app.Task = FlaskTask
celery_app.set_default()
celery_app.autodiscover_tasks()
app.extensions["celery"] = celery_app
return celery_app
celery_app = celery_init_app(flask_app)
@celery_app.task(bind=True)
def translate_task(
self: Task,
stream: bytes,
args: dict,
):
def progress_bar(t: tqdm.tqdm):
self.update_state(state="PROGRESS", meta={"n": t.n, "total": t.total}) # noqa
print(f"Translating {t.n} / {t.total} pages")
doc_mono, doc_dual = translate_stream(
stream,
callback=progress_bar,
model=ModelInstance.value,
**args,
)
return doc_mono, doc_dual
@flask_app.route("/v1/translate", methods=["POST"])
def create_translate_tasks():
file = request.files["file"]
stream = file.stream.read()
print(request.form.get("data"))
args = json.loads(request.form.get("data"))
task = translate_task.delay(stream, args)
return {"id": task.id}
@flask_app.route("/v1/translate/<id>", methods=["GET"])
def get_translate_task(id: str):
result: AsyncResult = celery_app.AsyncResult(id)
if str(result.state) == "PROGRESS":
return {"state": str(result.state), "info": result.info}
else:
return {"state": str(result.state)}
@flask_app.route("/v1/translate/<id>", methods=["DELETE"])
def delete_translate_task(id: str):
result: AsyncResult = celery_app.AsyncResult(id)
result.revoke(terminate=True)
return {"state": str(result.state)}
@flask_app.route("/v1/translate/<id>/<format>")
def get_translate_result(id: str, format: str):
result = celery_app.AsyncResult(id)
if not result.ready():
return {"error": "task not finished"}, 400
if not result.successful():
return {"error": "task failed"}, 400
doc_mono, doc_dual = result.get()
to_send = doc_mono if format == "mono" else doc_dual
return send_file(io.BytesIO(to_send), "application/pdf")
if __name__ == "__main__":
flask_app.run()