|
import os |
|
import requests |
|
import json |
|
import time |
|
from pprint import pprint |
|
|
|
|
|
def check_internet(): |
|
print("\n=== TESTING INTERNET CONNECTIVITY ===") |
|
try: |
|
urls = [ |
|
"https://huggingface.co", |
|
"https://google.com", |
|
"https://huggingface.co/allenai/longformer-base-4096" |
|
] |
|
|
|
for url in urls: |
|
try: |
|
print(f"Testing connection to {url}...") |
|
start_time = time.time() |
|
response = requests.get(url, timeout=10) |
|
elapsed = time.time() - start_time |
|
print(f" Status: {response.status_code}, Time: {elapsed:.2f}s") |
|
except Exception as e: |
|
print(f" Error: {str(e)}") |
|
except Exception as e: |
|
print(f"Network test failed: {str(e)}") |
|
|
|
|
|
def test_model_download(): |
|
print("\n=== TESTING MODEL DOWNLOAD ===") |
|
try: |
|
from transformers import AutoTokenizer |
|
|
|
|
|
cache_dir = "/tmp/model_cache_test" |
|
os.makedirs(cache_dir, exist_ok=True) |
|
print(f"Created test cache directory at {cache_dir}") |
|
|
|
|
|
model_name = "distilbert-base-uncased" |
|
print(f"Trying to download {model_name}...") |
|
|
|
start_time = time.time() |
|
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) |
|
elapsed = time.time() - start_time |
|
|
|
print(f"Successfully downloaded tokenizer in {elapsed:.2f}s") |
|
print(f"Tokenizer type: {type(tokenizer).__name__}") |
|
|
|
|
|
if os.path.exists(cache_dir): |
|
files = os.listdir(cache_dir) |
|
print(f"Files in cache directory: {len(files)}") |
|
if len(files) > 0: |
|
print(f"Sample files: {files[:5]}") |
|
except Exception as e: |
|
print(f"Model download test failed: {str(e)}") |
|
|
|
|
|
def check_disk_space(): |
|
print("\n=== CHECKING DISK SPACE ===") |
|
try: |
|
import shutil |
|
|
|
|
|
directories = ["/tmp", "/", "/home"] |
|
|
|
for directory in directories: |
|
if os.path.exists(directory): |
|
try: |
|
total, used, free = shutil.disk_usage(directory) |
|
print(f"Disk space for {directory}:") |
|
print(f" Total: {total / (1024**3):.2f} GB") |
|
print(f" Used: {used / (1024**3):.2f} GB") |
|
print(f" Free: {free / (1024**3):.2f} GB") |
|
print(f" Percent used: {(used / total) * 100:.1f}%") |
|
except Exception as e: |
|
print(f" Error checking {directory}: {str(e)}") |
|
except Exception as e: |
|
print(f"Disk space check failed: {str(e)}") |
|
|
|
|
|
def test_glove_loading(): |
|
print("\n=== TESTING GLOVE MODEL LOADING ===") |
|
try: |
|
import gensim.downloader |
|
import os |
|
|
|
|
|
gensim_dir = "/tmp/gensim-data" |
|
os.environ['GENSIM_DATA_DIR'] = gensim_dir |
|
os.makedirs(gensim_dir, exist_ok=True) |
|
print(f"Set GENSIM_DATA_DIR to {gensim_dir}") |
|
|
|
|
|
print("Trying to download GloVe model (this might take a while)...") |
|
start_time = time.time() |
|
glove = gensim.downloader.load('glove-wiki-gigaword-100') |
|
elapsed = time.time() - start_time |
|
|
|
print(f"Successfully loaded GloVe model in {elapsed:.2f}s") |
|
print(f"GloVe model type: {type(glove).__name__}") |
|
print(f"Vocabulary size: {len(glove.key_to_index)}") |
|
|
|
|
|
if os.path.exists(gensim_dir): |
|
files = os.listdir(gensim_dir) |
|
print(f"Files in GloVe directory: {len(files)}") |
|
if len(files) > 0: |
|
print(f"Sample files: {files[:5]}") |
|
except Exception as e: |
|
print(f"GloVe model loading test failed: {str(e)}") |
|
|
|
|
|
if __name__ == "__main__": |
|
print("Starting network and model download tests...") |
|
|
|
|
|
check_internet() |
|
test_model_download() |
|
test_glove_loading() |
|
check_disk_space() |
|
|
|
print("\nAll tests completed.") |