llama_cpp compatible?

#8
by Techn0man1ac - opened

I use this code:

from llama_cpp import Llama

Device = "cuda:0"
user_prompt = "Ви шанобливий помічник, розмовляєте українською мовою, коротко та лаконічно. Привітайтесь та запитайте як зовуть співрозмовника."

model = Llama(
model_path="D:\lm-studio\models\bartowski\DeepSeek-Coder-V2-Lite-Instruct-GGUF\DeepSeek-Coder-V2-Lite-Instruct-Q8_0.gguf",
#chat_format="deepseek2",
n_gpu_layers=5,
#flash_attn=True,
n_threads=6,
n_ctx=8192,
device=Device,
verbose=True,) # verbose=False - debug output off

messages=[{"role": "system", "content": user_prompt}]

def AI_speack(userText: str): #-> str:
new_message = {"role": "user", "content": userText}
messages.append(new_message)

output = model.create_chat_completion(messages, temperature=0.5, max_tokens=1024, stream=True)

LLM_Responce = ""

for chunk in output:
    delta = chunk["choices"][0]["delta"]
    if "content" not in delta:
        continue
    print(delta["content"], end="", flush=True)
    LLM_Responce += delta["content"]
print()


new_message = {"role": "assistant", "content": LLM_Responce}
messages.append(new_message)

#print(messages)
#return LLM_Responce

while True:
print(AI_speack(input(">")))

And answer is same, like "GGGGGGGGGGGGGGGGGG..."

image.png

Sign up or log in to comment