Web-Chatbot / Model.py
Dyraa18's picture
Upload 7 files
008d586 verified
raw
history blame contribute delete
541 Bytes
# Model.py
import os
from llama_cpp import Llama
def load_model(path, n_ctx=2048, n_gpu_layers=0, n_threads=None):
if n_threads is None:
n_threads = int(os.environ.get("NUM_THREADS", "4"))
return Llama(model_path=path, n_ctx=n_ctx, n_gpu_layers=n_gpu_layers, n_threads=n_threads)
def generate(llm, prompt, max_tokens=384, temperature=0.2, top_p=0.9, stop=None):
stop = stop or []
out = llm(prompt, max_tokens=max_tokens, temperature=temperature, top_p=top_p, stop=stop)
return out["choices"][0]["text"].strip()