Spaces:

MarionHalgrain
/

phi2_appli

Sleeping

phi2_appli / main.py

marion Halgrain

Update main.py

fd21c90 verified 8 months ago

699 Bytes

	from llama_cpp import LlamaModel
	from fastapi import FastAPI
	from pydantic import BaseModel

	# Charger le modèle LLaMA
	llm = LlamaModel(model_path="phi-2.Q5_K_M.gguf")

	# Pydantic object
	class Validation(BaseModel):
	prompt: str

	# Fast API
	app = FastAPI()

	@app.post("/llm_on_cpu")
	async def stream(item: Validation):
	system_prompt = 'Below is an instruction that describes a task. Write a response that appropriately completes the request.'
	E_INST = "</s>"
	user, assistant = "<\|user\|>", "<\|assistant\|>"
	prompt = f"{system_prompt}{E_INST}\n{user}\n{item.prompt}{E_INST}\n{assistant}\n"
	response = llm.generate(prompt, max_new_tokens=1096, num_threads=3)
	return response