from llama_cpp import Llama # system_prompt = """You are a excellent counsellor that helps learner with their mental health, their obstacles in education and their day-to-day life problems # user will ask you questions and you will carefully answer them""" # B_INST, E_INST = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>", "<|eot_id|>" # B_SYS, E_SYS = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>", "<|eot_id|>" # ASSISTANT_INST = "<|start_header_id|>assistant<|end_header_id|>" # SYSTEM_PROMPT = B_SYS + system_prompt + E_SYS model = Llama.from_pretrained(repo_id="Arpit-Bansal/counsellor_model_q5_k_m", filename="counsellor_model_q5_k_m-unsloth.Q5_K_M.gguf", verbose=False) #, generate_kwargs={"return_dict_in_generate": True} def prompt_for_chat(content:str): return [{"role": "system", "content": """You are an excellent counselor who assists user with their mental health, educational challenges, and everyday life issues. and you will provide thoughtful answers to user question."""}, { "role": "user", "content":content}] def response_return(response): res = "" for chunk in response: delta = chunk["choices"][0]["delta"] if "content" not in delta: continue res += delta["content"] return res def llm_function(user_input:str): llm_response = model.create_chat_completion(messages=prompt_for_chat(content=user_input), stream = True, temperature = 0.6, max_tokens = 256) resp = response_return(llm_response) return resp