from fastapi import FastAPI from fastapi.staticfiles import StaticFiles from fastapi.responses import FileResponse from transformers import pipeline app = FastAPI() from transformers import pipeline image_to_text = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") # [{'generated_text': 'a soccer game with a player jumping to catch the ball '}] model = AutoModelForSeq2SeqLM.from_pretrained("google/pix2struct-ocrvqa-large") app.mount("/", StaticFiles(directory="static", html=True), name="static") @app.get("/") def index() -> FileResponse: return FileResponse(path="/app/static/index.html", media_type="text/html") @app.get("/ocr") def ocr(input): result = image_to_text(input) print(result)