from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

def main():
    # Load the fine-tuned model and tokenizer
    model_output_dir = "/Users/migueldeguzman/Desktop/gpt2xl_algos/falcon-1b/v10/"  # Replace with your fine-tuned model directory
    tokenizer = AutoTokenizer.from_pretrained(model_output_dir)
    model = AutoModelForCausalLM.from_pretrained(model_output_dir)

    while True:
        # User input for text generation prompt
        prompt = input("Enter a prompt for text generation (or type 'exit' to quit): ")
        
        if prompt.lower() == 'exit':
            break

        # Encode the prompt and generate text
        input_ids = tokenizer.encode(prompt, return_tensors="pt")
        attention_mask = torch.ones(input_ids.shape, dtype=torch.long)  # Create an attention mask as a tensor
        
        output = model.generate(
            input_ids,
            attention_mask=attention_mask,  # Include the attention mask
            max_length=1024,
            num_return_sequences=1,
            no_repeat_ngram_size=2,
            do_sample=True,
            top_k=50,
            top_p=0.95,
            temperature=0.000000001
        )

        # Decode and print the generated text
        generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
        print("Generated Text:")
        print(generated_text)

if __name__ == "__main__":
    main()