|
|
|
""" |
|
Inference script for samples autoencoder model |
|
Generated automatically during training |
|
""" |
|
|
|
import torch |
|
import pandas as pd |
|
import numpy as np |
|
import json |
|
import argparse |
|
import os |
|
|
|
def load_model_and_config(model_dir): |
|
"""Load the trained model and its configuration""" |
|
config_path = os.path.join(model_dir, 'model_config.json') |
|
with open(config_path, 'r') as f: |
|
config = json.load(f) |
|
|
|
|
|
model_file = config['model_info']['saved_model_file'] |
|
model_path = os.path.join(model_dir, model_file) |
|
|
|
|
|
from compress_data_unified import SimpleAE, AE |
|
|
|
latent_dims = config['model_info']['latent_dims'] |
|
input_dim = config['model_info']['input_dim'] |
|
layer_sizes = config['model_info']['layer_sizes'] |
|
model_type = config['model_info']['model_type'] |
|
|
|
if model_type == 'SimpleAE': |
|
if isinstance(layer_sizes, list) and len(layer_sizes) > 1: |
|
|
|
model = AE(layer_sizes, use_simple=True) |
|
else: |
|
|
|
model = SimpleAE(input_dim, latent_dims) |
|
else: |
|
|
|
model = AE(layer_sizes, use_simple=False) |
|
|
|
model.load_state_dict(torch.load(model_path, map_location='cpu')) |
|
model.eval() |
|
|
|
return model, config |
|
|
|
def preprocess_data(data, config): |
|
"""Apply same preprocessing as training""" |
|
|
|
eps = 1e-8 |
|
min_val = np.nanmin(data) |
|
max_val = np.nanmax(data) |
|
if max_val - min_val < eps: |
|
return data |
|
normalized = 2 * (data - min_val) / (max_val - min_val + eps) - 1 |
|
return normalized |
|
|
|
def run_inference(model_dir, input_data_path, output_path=None): |
|
"""Run inference on new data""" |
|
model, config = load_model_and_config(model_dir) |
|
|
|
|
|
data = pd.read_csv(input_data_path, index_col=0) |
|
data_processed = preprocess_data(data, config) |
|
|
|
|
|
data_tensor = torch.FloatTensor(data_processed.values) |
|
|
|
|
|
with torch.no_grad(): |
|
|
|
latent = model.encode(data_tensor) |
|
|
|
reconstructed = model.decode(latent) |
|
|
|
|
|
latent_df = pd.DataFrame(latent.numpy(), |
|
index=data.index, |
|
columns=[f'latent_{i+1}' for i in range(config['model_info']['latent_dims'])]) |
|
|
|
reconstructed_df = pd.DataFrame(reconstructed.numpy(), |
|
index=data.index, |
|
columns=data.columns) |
|
|
|
|
|
if output_path is None: |
|
output_path = 'inference_results' |
|
|
|
os.makedirs(output_path, exist_ok=True) |
|
latent_df.to_csv(os.path.join(output_path, 'latent_representation.csv')) |
|
reconstructed_df.to_csv(os.path.join(output_path, 'reconstructed_data.csv')) |
|
|
|
print(f"Inference completed:") |
|
print(f" Latent representation saved: {os.path.join(output_path, 'latent_representation.csv')}") |
|
print(f" Reconstructed data saved: {os.path.join(output_path, 'reconstructed_data.csv')}") |
|
|
|
return latent_df, reconstructed_df |
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser(description='Run inference with trained autoencoder') |
|
parser.add_argument('--model_dir', type=str, required=True, |
|
help='Directory containing trained model and config') |
|
parser.add_argument('--input_data', type=str, required=True, |
|
help='Path to input data CSV file') |
|
parser.add_argument('--output_dir', type=str, default='inference_results', |
|
help='Output directory for results') |
|
|
|
args = parser.parse_args() |
|
|
|
latent, reconstructed = run_inference(args.model_dir, args.input_data, args.output_dir) |
|
print(f"Latent dimensions: {latent.shape}") |
|
print(f"Reconstructed dimensions: {reconstructed.shape}") |
|
|