from llama_cpp import Llama from llama_cpp_agent import LlamaCppAgent from llama_cpp_agent import MessagesFormatterType from llama_cpp_agent.providers import LlamaCppPythonProvider # Initialize the Llama model llama_model = Llama("Qwen2-1.5B-Instruct-Abliterated-iMat-GGUF/Qwen2-1.5B-Instruct-Abliterated-Q8_0.gguf", n_batch=1024, n_threads=24, n_gpu_layers=33, n_ctx=4098, verbose=False) # Create the provider provider = LlamaCppPythonProvider(llama_model) # Create the agent agent = LlamaCppAgent( provider, system_prompt="You are a helpful assistant who's purpose is it to help users craft and edit datasets.", predefined_messages_formatter_type=MessagesFormatterType.CHATML, debug_output=True ) # Set provider settings settings = provider.get_provider_default_settings() settings.max_tokens = 2000 settings.stream = True def send_to_llm(provider, msg_list): try: full_message = "\n".join([f"{msg['role']}: {msg['content']}" for msg in msg_list]) response = agent.get_chat_response(full_message, llm_sampling_settings=settings) if isinstance(response, str): return response, None elif hasattr(response, 'content'): return response.content, None else: return str(response), None except Exception as e: print(f"Error in send_to_llm: {str(e)}") return f"Error: {str(e)}", None