File size: 4,441 Bytes
7812756 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import os
import requests
import json
import time
from pprint import pprint
# Test internet connectivity
def check_internet():
print("\n=== TESTING INTERNET CONNECTIVITY ===")
try:
urls = [
"https://huggingface.co",
"https://google.com",
"https://huggingface.co/allenai/longformer-base-4096"
]
for url in urls:
try:
print(f"Testing connection to {url}...")
start_time = time.time()
response = requests.get(url, timeout=10)
elapsed = time.time() - start_time
print(f" Status: {response.status_code}, Time: {elapsed:.2f}s")
except Exception as e:
print(f" Error: {str(e)}")
except Exception as e:
print(f"Network test failed: {str(e)}")
# Test model download
def test_model_download():
print("\n=== TESTING MODEL DOWNLOAD ===")
try:
from transformers import AutoTokenizer
# Create cache directory
cache_dir = "/tmp/model_cache_test"
os.makedirs(cache_dir, exist_ok=True)
print(f"Created test cache directory at {cache_dir}")
# Try to download a model
model_name = "distilbert-base-uncased" # Smaller model for testing
print(f"Trying to download {model_name}...")
start_time = time.time()
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
elapsed = time.time() - start_time
print(f"Successfully downloaded tokenizer in {elapsed:.2f}s")
print(f"Tokenizer type: {type(tokenizer).__name__}")
# Check if files were created
if os.path.exists(cache_dir):
files = os.listdir(cache_dir)
print(f"Files in cache directory: {len(files)}")
if len(files) > 0:
print(f"Sample files: {files[:5]}")
except Exception as e:
print(f"Model download test failed: {str(e)}")
# Check disk space
def check_disk_space():
print("\n=== CHECKING DISK SPACE ===")
try:
import shutil
# Check disk space in various directories
directories = ["/tmp", "/", "/home"]
for directory in directories:
if os.path.exists(directory):
try:
total, used, free = shutil.disk_usage(directory)
print(f"Disk space for {directory}:")
print(f" Total: {total / (1024**3):.2f} GB")
print(f" Used: {used / (1024**3):.2f} GB")
print(f" Free: {free / (1024**3):.2f} GB")
print(f" Percent used: {(used / total) * 100:.1f}%")
except Exception as e:
print(f" Error checking {directory}: {str(e)}")
except Exception as e:
print(f"Disk space check failed: {str(e)}")
# Test GloVe model loading
def test_glove_loading():
print("\n=== TESTING GLOVE MODEL LOADING ===")
try:
import gensim.downloader
import os
# Set gensim data directory to a writable location
gensim_dir = "/tmp/gensim-data"
os.environ['GENSIM_DATA_DIR'] = gensim_dir
os.makedirs(gensim_dir, exist_ok=True)
print(f"Set GENSIM_DATA_DIR to {gensim_dir}")
# Try to download GloVe
print("Trying to download GloVe model (this might take a while)...")
start_time = time.time()
glove = gensim.downloader.load('glove-wiki-gigaword-100')
elapsed = time.time() - start_time
print(f"Successfully loaded GloVe model in {elapsed:.2f}s")
print(f"GloVe model type: {type(glove).__name__}")
print(f"Vocabulary size: {len(glove.key_to_index)}")
# Check if files were created
if os.path.exists(gensim_dir):
files = os.listdir(gensim_dir)
print(f"Files in GloVe directory: {len(files)}")
if len(files) > 0:
print(f"Sample files: {files[:5]}")
except Exception as e:
print(f"GloVe model loading test failed: {str(e)}")
# Main function
if __name__ == "__main__":
print("Starting network and model download tests...")
# Run tests
check_internet()
test_model_download()
test_glove_loading()
check_disk_space()
print("\nAll tests completed.") |