File size: 4,441 Bytes
7812756
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import os
import requests
import json
import time
from pprint import pprint

# Test internet connectivity
def check_internet():
    print("\n=== TESTING INTERNET CONNECTIVITY ===")
    try:
        urls = [
            "https://huggingface.co",
            "https://google.com",
            "https://huggingface.co/allenai/longformer-base-4096"
        ]
        
        for url in urls:
            try:
                print(f"Testing connection to {url}...")
                start_time = time.time()
                response = requests.get(url, timeout=10)
                elapsed = time.time() - start_time
                print(f"  Status: {response.status_code}, Time: {elapsed:.2f}s")
            except Exception as e:
                print(f"  Error: {str(e)}")
    except Exception as e:
        print(f"Network test failed: {str(e)}")

# Test model download
def test_model_download():
    print("\n=== TESTING MODEL DOWNLOAD ===")
    try:
        from transformers import AutoTokenizer
        
        # Create cache directory
        cache_dir = "/tmp/model_cache_test"
        os.makedirs(cache_dir, exist_ok=True)
        print(f"Created test cache directory at {cache_dir}")
        
        # Try to download a model
        model_name = "distilbert-base-uncased"  # Smaller model for testing
        print(f"Trying to download {model_name}...")
        
        start_time = time.time()
        tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
        elapsed = time.time() - start_time
        
        print(f"Successfully downloaded tokenizer in {elapsed:.2f}s")
        print(f"Tokenizer type: {type(tokenizer).__name__}")
        
        # Check if files were created
        if os.path.exists(cache_dir):
            files = os.listdir(cache_dir)
            print(f"Files in cache directory: {len(files)}")
            if len(files) > 0:
                print(f"Sample files: {files[:5]}")
    except Exception as e:
        print(f"Model download test failed: {str(e)}")

# Check disk space
def check_disk_space():
    print("\n=== CHECKING DISK SPACE ===")
    try:
        import shutil
        
        # Check disk space in various directories
        directories = ["/tmp", "/", "/home"]
        
        for directory in directories:
            if os.path.exists(directory):
                try:
                    total, used, free = shutil.disk_usage(directory)
                    print(f"Disk space for {directory}:")
                    print(f"  Total: {total / (1024**3):.2f} GB")
                    print(f"  Used: {used / (1024**3):.2f} GB")
                    print(f"  Free: {free / (1024**3):.2f} GB")
                    print(f"  Percent used: {(used / total) * 100:.1f}%")
                except Exception as e:
                    print(f"  Error checking {directory}: {str(e)}")
    except Exception as e:
        print(f"Disk space check failed: {str(e)}")

# Test GloVe model loading
def test_glove_loading():
    print("\n=== TESTING GLOVE MODEL LOADING ===")
    try:
        import gensim.downloader
        import os
        
        # Set gensim data directory to a writable location
        gensim_dir = "/tmp/gensim-data"
        os.environ['GENSIM_DATA_DIR'] = gensim_dir
        os.makedirs(gensim_dir, exist_ok=True)
        print(f"Set GENSIM_DATA_DIR to {gensim_dir}")
        
        # Try to download GloVe
        print("Trying to download GloVe model (this might take a while)...")
        start_time = time.time()
        glove = gensim.downloader.load('glove-wiki-gigaword-100')
        elapsed = time.time() - start_time
        
        print(f"Successfully loaded GloVe model in {elapsed:.2f}s")
        print(f"GloVe model type: {type(glove).__name__}")
        print(f"Vocabulary size: {len(glove.key_to_index)}")
        
        # Check if files were created
        if os.path.exists(gensim_dir):
            files = os.listdir(gensim_dir)
            print(f"Files in GloVe directory: {len(files)}")
            if len(files) > 0:
                print(f"Sample files: {files[:5]}")
    except Exception as e:
        print(f"GloVe model loading test failed: {str(e)}")

# Main function
if __name__ == "__main__":
    print("Starting network and model download tests...")
    
    # Run tests
    check_internet()
    test_model_download()
    test_glove_loading()
    check_disk_space()
    
    print("\nAll tests completed.")