|
|
|
""" |
|
Script to pre-download T5 models with extended timeout settings |
|
""" |
|
|
|
import os |
|
import time |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
|
|
def download_t5_model(): |
|
"""Download T5-base model and tokenizer with extended timeout""" |
|
|
|
|
|
os.environ['HF_HUB_TIMEOUT'] = '300' |
|
os.environ['REQUESTS_TIMEOUT'] = '300' |
|
|
|
print("Downloading T5-base model and tokenizer...") |
|
print("This may take several minutes depending on your connection...") |
|
|
|
try: |
|
print("Step 1/2: Downloading tokenizer...") |
|
tokenizer = AutoTokenizer.from_pretrained('t5-base') |
|
print("β
Tokenizer downloaded successfully") |
|
|
|
print("Step 2/2: Downloading model...") |
|
model = AutoModelForSeq2SeqLM.from_pretrained('t5-base') |
|
print("β
Model downloaded successfully") |
|
|
|
print("π All models downloaded and cached!") |
|
print("You can now run the training scripts offline.") |
|
|
|
return True |
|
|
|
except Exception as e: |
|
print(f"β Download failed: {e}") |
|
print("\nπ‘ Alternative solutions:") |
|
print("1. Try again with better internet connection") |
|
print("2. Use a VPN if there are regional restrictions") |
|
print("3. Download manually from: https://huggingface.co/t5-base") |
|
return False |
|
|
|
if __name__ == "__main__": |
|
success = download_t5_model() |
|
if success: |
|
print("\nβ
Ready for training! You can now run:") |
|
print(" powershell -ExecutionPolicy Bypass -File scripts/test_small_training.ps1") |
|
else: |
|
print("\nβ οΈ Please fix connectivity and try again") |