cpu_inf / main.py
daniellefranca96's picture
Update main.py
235c8cd
raw
history blame
No virus
589 Bytes
from fastapi import FastAPI
from pydantic import BaseModel
import requests
from ctransformers import AutoModelForCausalLM
llm = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v0.6", model_file="ggml-model-q4_0.gguf")
#Pydantic object
class validation(BaseModel):
prompt: str
#Fast API
app = FastAPI()
@app.post("/llm_on_cpu")
async def stream(item: validation):
prefix="""<|user|>
"""
suffix="""<|endoftext|><|assistant|>"""
user="""
{prompt}"""
prompt = f"{prefix}{user.replace('{prompt}', item.prompt)}{suffix}"
return llm(prompt)