from transformers import AutoTokenizer, AutoModelForCausalLM checkpoint = 'alexghergh/gpt1' model = AutoModelForCausalLM.from_pretrained(checkpoint, trust_remote_code=True) tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True) prompt = 'The mastermind behind the plan was, all along, ' inputs = tokenizer(prompt, return_tensors='pt') generate_ids = model.generate(inputs.input_ids, max_new_tokens=40, num_beams=1, do_sample=True, top_p=0.9, temperature=0.8) print(tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0])