import os import requests import json import time from pprint import pprint # Test internet connectivity def check_internet(): print("\n=== TESTING INTERNET CONNECTIVITY ===") try: urls = [ "https://huggingface.co", "https://google.com", "https://huggingface.co/allenai/longformer-base-4096" ] for url in urls: try: print(f"Testing connection to {url}...") start_time = time.time() response = requests.get(url, timeout=10) elapsed = time.time() - start_time print(f" Status: {response.status_code}, Time: {elapsed:.2f}s") except Exception as e: print(f" Error: {str(e)}") except Exception as e: print(f"Network test failed: {str(e)}") # Test model download def test_model_download(): print("\n=== TESTING MODEL DOWNLOAD ===") try: from transformers import AutoTokenizer # Create cache directory cache_dir = "/tmp/model_cache_test" os.makedirs(cache_dir, exist_ok=True) print(f"Created test cache directory at {cache_dir}") # Try to download a model model_name = "distilbert-base-uncased" # Smaller model for testing print(f"Trying to download {model_name}...") start_time = time.time() tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir) elapsed = time.time() - start_time print(f"Successfully downloaded tokenizer in {elapsed:.2f}s") print(f"Tokenizer type: {type(tokenizer).__name__}") # Check if files were created if os.path.exists(cache_dir): files = os.listdir(cache_dir) print(f"Files in cache directory: {len(files)}") if len(files) > 0: print(f"Sample files: {files[:5]}") except Exception as e: print(f"Model download test failed: {str(e)}") # Check disk space def check_disk_space(): print("\n=== CHECKING DISK SPACE ===") try: import shutil # Check disk space in various directories directories = ["/tmp", "/", "/home"] for directory in directories: if os.path.exists(directory): try: total, used, free = shutil.disk_usage(directory) print(f"Disk space for {directory}:") print(f" Total: {total / (1024**3):.2f} GB") print(f" Used: {used / (1024**3):.2f} GB") print(f" Free: {free / (1024**3):.2f} GB") print(f" Percent used: {(used / total) * 100:.1f}%") except Exception as e: print(f" Error checking {directory}: {str(e)}") except Exception as e: print(f"Disk space check failed: {str(e)}") # Test GloVe model loading def test_glove_loading(): print("\n=== TESTING GLOVE MODEL LOADING ===") try: import gensim.downloader import os # Set gensim data directory to a writable location gensim_dir = "/tmp/gensim-data" os.environ['GENSIM_DATA_DIR'] = gensim_dir os.makedirs(gensim_dir, exist_ok=True) print(f"Set GENSIM_DATA_DIR to {gensim_dir}") # Try to download GloVe print("Trying to download GloVe model (this might take a while)...") start_time = time.time() glove = gensim.downloader.load('glove-wiki-gigaword-100') elapsed = time.time() - start_time print(f"Successfully loaded GloVe model in {elapsed:.2f}s") print(f"GloVe model type: {type(glove).__name__}") print(f"Vocabulary size: {len(glove.key_to_index)}") # Check if files were created if os.path.exists(gensim_dir): files = os.listdir(gensim_dir) print(f"Files in GloVe directory: {len(files)}") if len(files) > 0: print(f"Sample files: {files[:5]}") except Exception as e: print(f"GloVe model loading test failed: {str(e)}") # Main function if __name__ == "__main__": print("Starting network and model download tests...") # Run tests check_internet() test_model_download() test_glove_loading() check_disk_space() print("\nAll tests completed.")