llama_cpp compatible?
I use this code:
from llama_cpp import Llama
Device = "cuda:0"
user_prompt = "Ви шанобливий помічник, розмовляєте українською мовою, коротко та лаконічно. Привітайтесь та запитайте як зовуть співрозмовника."
model = Llama(
model_path="D:\lm-studio\models\bartowski\DeepSeek-Coder-V2-Lite-Instruct-GGUF\DeepSeek-Coder-V2-Lite-Instruct-Q8_0.gguf",
#chat_format="deepseek2",
n_gpu_layers=5,
#flash_attn=True,
n_threads=6,
n_ctx=8192,
device=Device,
verbose=True,) # verbose=False - debug output off
messages=[{"role": "system", "content": user_prompt}]
def AI_speack(userText: str): #-> str:
new_message = {"role": "user", "content": userText}
messages.append(new_message)
output = model.create_chat_completion(messages, temperature=0.5, max_tokens=1024, stream=True)
LLM_Responce = ""
for chunk in output:
delta = chunk["choices"][0]["delta"]
if "content" not in delta:
continue
print(delta["content"], end="", flush=True)
LLM_Responce += delta["content"]
print()
new_message = {"role": "assistant", "content": LLM_Responce}
messages.append(new_message)
#print(messages)
#return LLM_Responce
while True:
print(AI_speack(input(">")))
And answer is same, like "GGGGGGGGGGGGGGGGGG..."