ae_tracerx_2d / run_inference.py
jruffle's picture
Upload run_inference.py with huggingface_hub
7442188 verified
#!/usr/bin/env python3
"""
Inference script for samples autoencoder model
Generated automatically during training
"""
import torch
import pandas as pd
import numpy as np
import json
import argparse
import os
def load_model_and_config(model_dir):
"""Load the trained model and its configuration"""
config_path = os.path.join(model_dir, 'model_config.json')
with open(config_path, 'r') as f:
config = json.load(f)
# Load model
model_file = config['model_info']['saved_model_file']
model_path = os.path.join(model_dir, model_file)
# Reconstruct model architecture based on model type
from compress_data_unified import SimpleAE, AE
latent_dims = config['model_info']['latent_dims']
input_dim = config['model_info']['input_dim']
layer_sizes = config['model_info']['layer_sizes']
model_type = config['model_info']['model_type']
if model_type == 'SimpleAE':
if isinstance(layer_sizes, list) and len(layer_sizes) > 1:
# If wrapped in AE class
model = AE(layer_sizes, use_simple=True)
else:
# Direct SimpleAE
model = SimpleAE(input_dim, latent_dims)
else:
# Standard AE
model = AE(layer_sizes, use_simple=False)
model.load_state_dict(torch.load(model_path, map_location='cpu'))
model.eval()
return model, config
def preprocess_data(data, config):
"""Apply same preprocessing as training"""
# Normalize to [-1, 1] range exactly as done in training
eps = 1e-8
min_val = np.nanmin(data)
max_val = np.nanmax(data)
if max_val - min_val < eps:
return data
normalized = 2 * (data - min_val) / (max_val - min_val + eps) - 1
return normalized
def run_inference(model_dir, input_data_path, output_path=None):
"""Run inference on new data"""
model, config = load_model_and_config(model_dir)
# Load and preprocess data
data = pd.read_csv(input_data_path, index_col=0)
data_processed = preprocess_data(data, config)
# Convert to tensor
data_tensor = torch.FloatTensor(data_processed.values)
# Run inference
with torch.no_grad():
# Encode to latent space
latent = model.encode(data_tensor)
# Decode back to original space
reconstructed = model.decode(latent)
# Convert back to dataframes
latent_df = pd.DataFrame(latent.numpy(),
index=data.index,
columns=[f'latent_{i+1}' for i in range(config['model_info']['latent_dims'])])
reconstructed_df = pd.DataFrame(reconstructed.numpy(),
index=data.index,
columns=data.columns)
# Save results
if output_path is None:
output_path = 'inference_results'
os.makedirs(output_path, exist_ok=True)
latent_df.to_csv(os.path.join(output_path, 'latent_representation.csv'))
reconstructed_df.to_csv(os.path.join(output_path, 'reconstructed_data.csv'))
print(f"Inference completed:")
print(f" Latent representation saved: {os.path.join(output_path, 'latent_representation.csv')}")
print(f" Reconstructed data saved: {os.path.join(output_path, 'reconstructed_data.csv')}")
return latent_df, reconstructed_df
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Run inference with trained autoencoder')
parser.add_argument('--model_dir', type=str, required=True,
help='Directory containing trained model and config')
parser.add_argument('--input_data', type=str, required=True,
help='Path to input data CSV file')
parser.add_argument('--output_dir', type=str, default='inference_results',
help='Output directory for results')
args = parser.parse_args()
latent, reconstructed = run_inference(args.model_dir, args.input_data, args.output_dir)
print(f"Latent dimensions: {latent.shape}")
print(f"Reconstructed dimensions: {reconstructed.shape}")