File size: 1,702 Bytes
7c58f51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python3
"""Test the saved compressed models"""
import torch
import torch.nn as nn
import os

print("="*70)
print(" "*10 + "TESTING SAVED COMPRESSED MODELS")
print("="*70)

# Test MLP model
print("\n1. Testing MLP models:")
print("-"*40)

# Load original
original_mlp = torch.load("compressed_models/mlp_original_fp32.pth")
print(f"✅ Loaded original MLP: {os.path.getsize('compressed_models/mlp_original_fp32.pth')/1024:.1f} KB")

# Load compressed
compressed_mlp = torch.load("compressed_models/mlp_compressed_int8.pth")
print(f"✅ Loaded compressed MLP: {os.path.getsize('compressed_models/mlp_compressed_int8.pth')/1024:.1f} KB")

# Recreate model and test
model = nn.Sequential(
    nn.Linear(784, 256),
    nn.ReLU(),
    nn.Linear(256, 128),
    nn.ReLU(),
    nn.Linear(128, 10)
)
model.load_state_dict(original_mlp['model_state_dict'])

# Test inference
test_input = torch.randn(1, 784)
with torch.no_grad():
    output = model(test_input)
    print(f"   Original output shape: {output.shape}")
    print(f"   Prediction: {torch.argmax(output).item()}")

# For quantized model, we need to recreate and quantize
model_quant = nn.Sequential(
    nn.Linear(784, 256),
    nn.ReLU(),
    nn.Linear(256, 128),
    nn.ReLU(),
    nn.Linear(128, 10)
)
model_quant.eval()
model_quant = torch.quantization.quantize_dynamic(model_quant, {nn.Linear}, dtype=torch.qint8)
model_quant.load_state_dict(compressed_mlp['model_state_dict'])

with torch.no_grad():
    output_quant = model_quant(test_input)
    print(f"   Compressed output shape: {output_quant.shape}")
    print(f"   Prediction: {torch.argmax(output_quant).item()}")

print("\n✅ Both models work and produce valid outputs!")