from transformers import AutoModelForCausalLM, AutoTokenizer import torch def main(): # Load the fine-tuned model and tokenizer model_output_dir = "/Users/migueldeguzman/Desktop/gpt2xl_algos/falcon-1b/v10/" # Replace with your fine-tuned model directory tokenizer = AutoTokenizer.from_pretrained(model_output_dir) model = AutoModelForCausalLM.from_pretrained(model_output_dir) while True: # User input for text generation prompt prompt = input("Enter a prompt for text generation (or type 'exit' to quit): ") if prompt.lower() == 'exit': break # Encode the prompt and generate text input_ids = tokenizer.encode(prompt, return_tensors="pt") attention_mask = torch.ones(input_ids.shape, dtype=torch.long) # Create an attention mask as a tensor output = model.generate( input_ids, attention_mask=attention_mask, # Include the attention mask max_length=1024, num_return_sequences=1, no_repeat_ngram_size=2, do_sample=True, top_k=50, top_p=0.95, temperature=0.000000001 ) # Decode and print the generated text generated_text = tokenizer.decode(output[0], skip_special_tokens=True) print("Generated Text:") print(generated_text) if __name__ == "__main__": main()