# Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("TheBloke/Llama-2-70B-Chat-fp16") model = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-70B-Chat-fp16") input_text =" hello how are you?" inputs= tokenizer.encode(input_text, return_tensors='pt') outputs= model.generate(inputs,max_length=50,num_return_sequences=5,temperature=0.7) print("Generated Text") for i, output in enumerate(outputs): print(f"{i}: {tokenizer.decode(output)}")