File size: 994 Bytes
7077c22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from llama_cpp import Llama

# Path to the GGUF model file
MODEL_PATH = "llama-3.1-8B.gguf"

# Load the model
print("Loading the model...")
try:
    llama = Llama(model_path=MODEL_PATH, n_ctx=1024, n_threads=4)
    print("Model loaded successfully!")
except Exception as e:
    print(f"Failed to load the model: {e}")
    exit(1)

# Chat loop
print("Chat with the model! Type 'exit' to end the conversation.")
while True:
    user_input = input("You: ").strip()
    if user_input.lower() == "exit":
        print("Exiting chat. Goodbye!")
        break

    # Query the model
    print("Thinking...")
    response = llama(
        user_input,
        max_tokens=50,       # Limit response length
        temperature=0.7,     # Control randomness
        top_p=0.9,           # Top-p sampling
        stop=["You:"]        # Stop at the next user prompt
    )

    # Extract and clean response text
    response_text = response['choices'][0]['text'].strip()
    print(f"Model: {response_text}")