#!/usr/bin/env python3 """ Inference script for samples autoencoder model Generated automatically during training """ import torch import pandas as pd import numpy as np import json import argparse import os def load_model_and_config(model_dir): """Load the trained model and its configuration""" config_path = os.path.join(model_dir, 'model_config.json') with open(config_path, 'r') as f: config = json.load(f) # Load model model_file = config['model_info']['saved_model_file'] model_path = os.path.join(model_dir, model_file) # Reconstruct model architecture based on model type from compress_data_unified import SimpleAE, AE latent_dims = config['model_info']['latent_dims'] input_dim = config['model_info']['input_dim'] layer_sizes = config['model_info']['layer_sizes'] model_type = config['model_info']['model_type'] if model_type == 'SimpleAE': if isinstance(layer_sizes, list) and len(layer_sizes) > 1: # If wrapped in AE class model = AE(layer_sizes, use_simple=True) else: # Direct SimpleAE model = SimpleAE(input_dim, latent_dims) else: # Standard AE model = AE(layer_sizes, use_simple=False) model.load_state_dict(torch.load(model_path, map_location='cpu')) model.eval() return model, config def preprocess_data(data, config): """Apply same preprocessing as training""" # Normalize to [-1, 1] range exactly as done in training eps = 1e-8 min_val = np.nanmin(data) max_val = np.nanmax(data) if max_val - min_val < eps: return data normalized = 2 * (data - min_val) / (max_val - min_val + eps) - 1 return normalized def run_inference(model_dir, input_data_path, output_path=None): """Run inference on new data""" model, config = load_model_and_config(model_dir) # Load and preprocess data data = pd.read_csv(input_data_path, index_col=0) data_processed = preprocess_data(data, config) # Convert to tensor data_tensor = torch.FloatTensor(data_processed.values) # Run inference with torch.no_grad(): # Encode to latent space latent = model.encode(data_tensor) # Decode back to original space reconstructed = model.decode(latent) # Convert back to dataframes latent_df = pd.DataFrame(latent.numpy(), index=data.index, columns=[f'latent_{i+1}' for i in range(config['model_info']['latent_dims'])]) reconstructed_df = pd.DataFrame(reconstructed.numpy(), index=data.index, columns=data.columns) # Save results if output_path is None: output_path = 'inference_results' os.makedirs(output_path, exist_ok=True) latent_df.to_csv(os.path.join(output_path, 'latent_representation.csv')) reconstructed_df.to_csv(os.path.join(output_path, 'reconstructed_data.csv')) print(f"Inference completed:") print(f" Latent representation saved: {os.path.join(output_path, 'latent_representation.csv')}") print(f" Reconstructed data saved: {os.path.join(output_path, 'reconstructed_data.csv')}") return latent_df, reconstructed_df if __name__ == "__main__": parser = argparse.ArgumentParser(description='Run inference with trained autoencoder') parser.add_argument('--model_dir', type=str, required=True, help='Directory containing trained model and config') parser.add_argument('--input_data', type=str, required=True, help='Path to input data CSV file') parser.add_argument('--output_dir', type=str, default='inference_results', help='Output directory for results') args = parser.parse_args() latent, reconstructed = run_inference(args.model_dir, args.input_data, args.output_dir) print(f"Latent dimensions: {latent.shape}") print(f"Reconstructed dimensions: {reconstructed.shape}")