|
from flask import Flask, request, send_file |
|
from celery import Celery, Task |
|
from celery.result import AsyncResult |
|
from pdf2zh import translate_stream |
|
import tqdm |
|
import json |
|
import io |
|
from pdf2zh.doclayout import ModelInstance |
|
from pdf2zh.config import ConfigManager |
|
|
|
flask_app = Flask("pdf2zh") |
|
flask_app.config.from_mapping( |
|
CELERY=dict( |
|
broker_url=ConfigManager.get("CELERY_BROKER", "redis://127.0.0.1:6379/0"), |
|
result_backend=ConfigManager.get("CELERY_RESULT", "redis://127.0.0.1:6379/0"), |
|
) |
|
) |
|
|
|
|
|
def celery_init_app(app: Flask) -> Celery: |
|
class FlaskTask(Task): |
|
def __call__(self, *args, **kwargs): |
|
with app.app_context(): |
|
return self.run(*args, **kwargs) |
|
|
|
celery_app = Celery(app.name) |
|
celery_app.config_from_object(app.config["CELERY"]) |
|
celery_app.Task = FlaskTask |
|
celery_app.set_default() |
|
celery_app.autodiscover_tasks() |
|
app.extensions["celery"] = celery_app |
|
return celery_app |
|
|
|
|
|
celery_app = celery_init_app(flask_app) |
|
|
|
|
|
@celery_app.task(bind=True) |
|
def translate_task( |
|
self: Task, |
|
stream: bytes, |
|
args: dict, |
|
): |
|
def progress_bar(t: tqdm.tqdm): |
|
self.update_state(state="PROGRESS", meta={"n": t.n, "total": t.total}) |
|
print(f"Translating {t.n} / {t.total} pages") |
|
|
|
doc_mono, doc_dual = translate_stream( |
|
stream, |
|
callback=progress_bar, |
|
model=ModelInstance.value, |
|
**args, |
|
) |
|
return doc_mono, doc_dual |
|
|
|
|
|
@flask_app.route("/v1/translate", methods=["POST"]) |
|
def create_translate_tasks(): |
|
file = request.files["file"] |
|
stream = file.stream.read() |
|
print(request.form.get("data")) |
|
args = json.loads(request.form.get("data")) |
|
task = translate_task.delay(stream, args) |
|
return {"id": task.id} |
|
|
|
|
|
@flask_app.route("/v1/translate/<id>", methods=["GET"]) |
|
def get_translate_task(id: str): |
|
result: AsyncResult = celery_app.AsyncResult(id) |
|
if str(result.state) == "PROGRESS": |
|
return {"state": str(result.state), "info": result.info} |
|
else: |
|
return {"state": str(result.state)} |
|
|
|
|
|
@flask_app.route("/v1/translate/<id>", methods=["DELETE"]) |
|
def delete_translate_task(id: str): |
|
result: AsyncResult = celery_app.AsyncResult(id) |
|
result.revoke(terminate=True) |
|
return {"state": str(result.state)} |
|
|
|
|
|
@flask_app.route("/v1/translate/<id>/<format>") |
|
def get_translate_result(id: str, format: str): |
|
result = celery_app.AsyncResult(id) |
|
if not result.ready(): |
|
return {"error": "task not finished"}, 400 |
|
if not result.successful(): |
|
return {"error": "task failed"}, 400 |
|
doc_mono, doc_dual = result.get() |
|
to_send = doc_mono if format == "mono" else doc_dual |
|
return send_file(io.BytesIO(to_send), "application/pdf") |
|
|
|
|
|
if __name__ == "__main__": |
|
flask_app.run() |
|
|