import base64 from fastapi import FastAPI, HTTPException from PIL import Image from pydantic import BaseModel import uvicorn from llama_cpp import Llama from llama_cpp.llama_chat_format import MoondreamChatHandler app = FastAPI() # define request body class RequestData(BaseModel): prompt: str image: str def load_model(): chat_handler = MoondreamChatHandler.from_pretrained( repo_id="vikhyatk/moondream2", filename="*mmproj*", ) llm = Llama.from_pretrained( repo_id="vikhyatk/moondream2", filename="*text-model*", chat_handler=chat_handler, n_ctx=2048, # n_ctx should be increased to accommodate the image embedding ) return llm @app.get("/") def greet_json(): return {"Hello": "World!"} @app.post("/query") def query(data: RequestData): prompt = data.prompt image = data.image llm = load_model() try: # decode base64 to image data_uri = f"""data:image/png;base64,{image}""" messages = [ {"role": "system", "content": "You are an assistant who perfectly describes images."}, { "role": "user", "content": [ {"type": "image_url", "image_url": {"url": data_uri }}, {"type" : "text", "text": f"""{prompt}"""} ] } ] response = llm.create_chat_completion( messages = messages ) output = response['choices'][0]['message']['content'] return {"response": str(output)} except Exception as e: raise HTTPException(status_code=500, detail=str(e)) if __name__ == "__main__": uvicorn.run("app:app")