from transformers import AutoModelForCausalLM, AutoTokenizer # Load the model and tokenizer model_name = "meta-llama/Llama-3.2-1B" # Replace with your model's name tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) # Define the refined prompt prompt = ( "You are a professional tour guide specializing in Saudi Arabia. Respond to questions about tourism with accurate, " "structured, and concise answers. Avoid unnecessary details and maintain a professional tone.\n\n" "Question: List the top tourist destinations in Saudi Arabia with a brief description.\nAnswer:\n" ) # Tokenize the input inputs = tokenizer(prompt, return_tensors="pt") # Generate response with optimized parameters outputs = model.generate( **inputs, max_new_tokens=120, temperature=0.5, top_k=30, top_p=0.85, repetition_penalty=1.5, ) # Decode and display the response response = tokenizer.decode(outputs[0], skip_special_tokens=True) print("LLaMA Response:", response)