|
from ctransformers import AutoModelForCausalLM |
|
from fastapi import FastAPI, Form |
|
from pydantic import BaseModel |
|
|
|
|
|
llm = AutoModelForCausalLM.from_pretrained("Meta-Llama-3-8B-Instruct-Q4_K_M.gguf", |
|
|
|
max_new_tokens = 1096, |
|
threads = 3, |
|
) |
|
|
|
|
|
|
|
class validation(BaseModel): |
|
prompt: str |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
@app.post("/llm_on_cpu") |
|
async def stream(item: validation): |
|
system_prompt = 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' |
|
prompt = f''' |
|
<|begin_of_text|><|start_header_id|>system<|end_header_id|> |
|
|
|
{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|> |
|
|
|
{item.prompt.strip()}<|eot_id|><|start_header_id|>assistant<|end_header_id|> |
|
''' |
|
return llm(prompt) |
|
|