#!/usr/bin/env python3 """ Script to pre-download T5 models with extended timeout settings """ import os import time from transformers import AutoTokenizer, AutoModelForSeq2SeqLM def download_t5_model(): """Download T5-base model and tokenizer with extended timeout""" # Set extended timeout os.environ['HF_HUB_TIMEOUT'] = '300' # 5 minutes os.environ['REQUESTS_TIMEOUT'] = '300' print("Downloading T5-base model and tokenizer...") print("This may take several minutes depending on your connection...") try: print("Step 1/2: Downloading tokenizer...") tokenizer = AutoTokenizer.from_pretrained('t5-base') print("āœ… Tokenizer downloaded successfully") print("Step 2/2: Downloading model...") model = AutoModelForSeq2SeqLM.from_pretrained('t5-base') print("āœ… Model downloaded successfully") print("šŸŽ‰ All models downloaded and cached!") print("You can now run the training scripts offline.") return True except Exception as e: print(f"āŒ Download failed: {e}") print("\nšŸ’” Alternative solutions:") print("1. Try again with better internet connection") print("2. Use a VPN if there are regional restrictions") print("3. Download manually from: https://huggingface.co/t5-base") return False if __name__ == "__main__": success = download_t5_model() if success: print("\nāœ… Ready for training! You can now run:") print(" powershell -ExecutionPolicy Bypass -File scripts/test_small_training.ps1") else: print("\nāš ļø Please fix connectivity and try again")