Spaces:
Sleeping
Sleeping
from llama_cpp import LlamaModel | |
from fastapi import FastAPI | |
from pydantic import BaseModel | |
# Charger le modèle LLaMA | |
llm = LlamaModel(model_path="phi-2.Q5_K_M.gguf") | |
# Pydantic object | |
class Validation(BaseModel): | |
prompt: str | |
# Fast API | |
app = FastAPI() | |
async def stream(item: Validation): | |
system_prompt = 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' | |
E_INST = "</s>" | |
user, assistant = "<|user|>", "<|assistant|>" | |
prompt = f"{system_prompt}{E_INST}\n{user}\n{item.prompt}{E_INST}\n{assistant}\n" | |
response = llm.generate(prompt, max_new_tokens=1096, num_threads=3) | |
return response | |