Spaces:
Running
Running
from transformers import AutoModelForCausalLM, AutoTokenizer | |
# Load the model and tokenizer | |
model_name = "meta-llama/Llama-3.2-1B" # Replace with your model's name | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
# Define the refined prompt | |
prompt = ( | |
"You are a professional tour guide specializing in Saudi Arabia. Respond to questions about tourism with accurate, " | |
"structured, and concise answers. Avoid unnecessary details and maintain a professional tone.\n\n" | |
"Question: List the top tourist destinations in Saudi Arabia with a brief description.\nAnswer:\n" | |
) | |
# Tokenize the input | |
inputs = tokenizer(prompt, return_tensors="pt") | |
# Generate response with optimized parameters | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=120, | |
temperature=0.5, | |
top_k=30, | |
top_p=0.85, | |
repetition_penalty=1.5, | |
) | |
# Decode and display the response | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
print("LLaMA Response:", response) | |