File size: 4,107 Bytes
7500f3e 7442188 7500f3e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
#!/usr/bin/env python3
"""
Inference script for samples autoencoder model
Generated automatically during training
"""
import torch
import pandas as pd
import numpy as np
import json
import argparse
import os
def load_model_and_config(model_dir):
"""Load the trained model and its configuration"""
config_path = os.path.join(model_dir, 'model_config.json')
with open(config_path, 'r') as f:
config = json.load(f)
# Load model
model_file = config['model_info']['saved_model_file']
model_path = os.path.join(model_dir, model_file)
# Reconstruct model architecture based on model type
from compress_data_unified import SimpleAE, AE
latent_dims = config['model_info']['latent_dims']
input_dim = config['model_info']['input_dim']
layer_sizes = config['model_info']['layer_sizes']
model_type = config['model_info']['model_type']
if model_type == 'SimpleAE':
if isinstance(layer_sizes, list) and len(layer_sizes) > 1:
# If wrapped in AE class
model = AE(layer_sizes, use_simple=True)
else:
# Direct SimpleAE
model = SimpleAE(input_dim, latent_dims)
else:
# Standard AE
model = AE(layer_sizes, use_simple=False)
model.load_state_dict(torch.load(model_path, map_location='cpu'))
model.eval()
return model, config
def preprocess_data(data, config):
"""Apply same preprocessing as training"""
# Normalize to [-1, 1] range exactly as done in training
eps = 1e-8
min_val = np.nanmin(data)
max_val = np.nanmax(data)
if max_val - min_val < eps:
return data
normalized = 2 * (data - min_val) / (max_val - min_val + eps) - 1
return normalized
def run_inference(model_dir, input_data_path, output_path=None):
"""Run inference on new data"""
model, config = load_model_and_config(model_dir)
# Load and preprocess data
data = pd.read_csv(input_data_path, index_col=0)
data_processed = preprocess_data(data, config)
# Convert to tensor
data_tensor = torch.FloatTensor(data_processed.values)
# Run inference
with torch.no_grad():
# Encode to latent space
latent = model.encode(data_tensor)
# Decode back to original space
reconstructed = model.decode(latent)
# Convert back to dataframes
latent_df = pd.DataFrame(latent.numpy(),
index=data.index,
columns=[f'latent_{i+1}' for i in range(config['model_info']['latent_dims'])])
reconstructed_df = pd.DataFrame(reconstructed.numpy(),
index=data.index,
columns=data.columns)
# Save results
if output_path is None:
output_path = 'inference_results'
os.makedirs(output_path, exist_ok=True)
latent_df.to_csv(os.path.join(output_path, 'latent_representation.csv'))
reconstructed_df.to_csv(os.path.join(output_path, 'reconstructed_data.csv'))
print(f"Inference completed:")
print(f" Latent representation saved: {os.path.join(output_path, 'latent_representation.csv')}")
print(f" Reconstructed data saved: {os.path.join(output_path, 'reconstructed_data.csv')}")
return latent_df, reconstructed_df
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Run inference with trained autoencoder')
parser.add_argument('--model_dir', type=str, required=True,
help='Directory containing trained model and config')
parser.add_argument('--input_data', type=str, required=True,
help='Path to input data CSV file')
parser.add_argument('--output_dir', type=str, default='inference_results',
help='Output directory for results')
args = parser.parse_args()
latent, reconstructed = run_inference(args.model_dir, args.input_data, args.output_dir)
print(f"Latent dimensions: {latent.shape}")
print(f"Reconstructed dimensions: {reconstructed.shape}")
|