| | |
| | """ |
| | Inference script for transcriptome autoencoder model |
| | Generated automatically during training |
| | """ |
| |
|
| | import torch |
| | import pandas as pd |
| | import numpy as np |
| | import json |
| | import argparse |
| | import os |
| |
|
| | def load_model_and_config(model_dir): |
| | """Load the trained model and its configuration""" |
| | config_path = os.path.join(model_dir, 'model_config.json') |
| | with open(config_path, 'r') as f: |
| | config = json.load(f) |
| | |
| | |
| | model_file = config['model_info']['saved_model_file'] |
| | model_path = os.path.join(model_dir, model_file) |
| | |
| | |
| | from compress_data_unified import SimpleAE, AE |
| | |
| | latent_dims = config['model_info']['latent_dims'] |
| | input_dim = config['model_info']['input_dim'] |
| | layer_sizes = config['model_info']['layer_sizes'] |
| | model_type = config['model_info']['model_type'] |
| | |
| | if model_type == 'SimpleAE': |
| | if isinstance(layer_sizes, list) and len(layer_sizes) > 1: |
| | |
| | model = AE(layer_sizes, use_simple=True) |
| | else: |
| | |
| | model = SimpleAE(input_dim, latent_dims) |
| | else: |
| | |
| | model = AE(layer_sizes, use_simple=False) |
| | |
| | model.load_state_dict(torch.load(model_path, map_location='cpu')) |
| | model.eval() |
| | |
| | return model, config |
| |
|
| | def preprocess_data(data, config): |
| | """Apply same preprocessing as training""" |
| | |
| | eps = 1e-8 |
| | min_val = np.nanmin(data) |
| | max_val = np.nanmax(data) |
| | if max_val - min_val < eps: |
| | return data |
| | normalized = 2 * (data - min_val) / (max_val - min_val + eps) - 1 |
| | return normalized |
| |
|
| | def run_inference(model_dir, input_data_path, output_path=None): |
| | """Run inference on new data""" |
| | model, config = load_model_and_config(model_dir) |
| | |
| | |
| | data = pd.read_csv(input_data_path, index_col=0) |
| | data_processed = preprocess_data(data, config) |
| | |
| | |
| | data_tensor = torch.FloatTensor(data_processed.values) |
| | |
| | |
| | with torch.no_grad(): |
| | |
| | latent = model.encode(data_tensor) |
| | |
| | reconstructed = model.decode(latent) |
| | |
| | |
| | latent_df = pd.DataFrame(latent.numpy(), |
| | index=data.index, |
| | columns=[f'latent_{i+1}' for i in range(config['model_info']['latent_dims'])]) |
| | |
| | reconstructed_df = pd.DataFrame(reconstructed.numpy(), |
| | index=data.index, |
| | columns=data.columns) |
| | |
| | |
| | if output_path is None: |
| | output_path = 'inference_results' |
| | |
| | os.makedirs(output_path, exist_ok=True) |
| | latent_df.to_csv(os.path.join(output_path, 'latent_representation.csv')) |
| | reconstructed_df.to_csv(os.path.join(output_path, 'reconstructed_data.csv')) |
| | |
| | print(f"Inference completed:") |
| | print(f" Latent representation saved: {os.path.join(output_path, 'latent_representation.csv')}") |
| | print(f" Reconstructed data saved: {os.path.join(output_path, 'reconstructed_data.csv')}") |
| | |
| | return latent_df, reconstructed_df |
| |
|
| | if __name__ == "__main__": |
| | parser = argparse.ArgumentParser(description='Run inference with trained autoencoder') |
| | parser.add_argument('--model_dir', type=str, required=True, |
| | help='Directory containing trained model and config') |
| | parser.add_argument('--input_data', type=str, required=True, |
| | help='Path to input data CSV file') |
| | parser.add_argument('--output_dir', type=str, default='inference_results', |
| | help='Output directory for results') |
| | |
| | args = parser.parse_args() |
| | |
| | latent, reconstructed = run_inference(args.model_dir, args.input_data, args.output_dir) |
| | print(f"Latent dimensions: {latent.shape}") |
| | print(f"Reconstructed dimensions: {reconstructed.shape}") |
| |
|