| | |
| | import os |
| | import sys |
| | import json |
| | import torch |
| | import torch.nn as nn |
| | import torch.optim as optim |
| | import numpy as np |
| | import pandas as pd |
| | from tqdm import tqdm |
| | from sklearn.preprocessing import StandardScaler |
| | from torch.utils.data import Dataset, DataLoader |
| |
|
| | sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) |
| | try: |
| | from src.logger import get_logger |
| | logger = get_logger(__name__) |
| | except Exception: |
| | import logging |
| | logging.basicConfig(level=logging.INFO) |
| | logger = logging.getLogger(__name__) |
| |
|
| | class LatentTickerDataset(Dataset): |
| | def __init__(self, latent_path, ticker_path): |
| | self.latents = np.load(latent_path) |
| | self.tickers = np.load(ticker_path) |
| | assert self.latents.shape[0] == self.tickers.shape[0], "Latents and tickers length mismatch" |
| |
|
| | def __len__(self): |
| | return self.latents.shape[0] |
| |
|
| | def __getitem__(self, idx): |
| | x = self.latents[idx].astype(np.float32) |
| | y = int(self.tickers[idx]) |
| | return x, y |
| |
|
| | class ConditionalGenerator(nn.Module): |
| | def __init__(self, noise_dim, embed_dim, num_tickers, latent_dim, hidden_dim=128): |
| | super().__init__() |
| | self.ticker_emb = nn.Embedding(num_tickers, embed_dim) |
| | input_dim = noise_dim + embed_dim |
| | self.net = nn.Sequential( |
| | nn.Linear(input_dim, hidden_dim), |
| | nn.LeakyReLU(0.2), |
| | nn.Linear(hidden_dim, hidden_dim), |
| | nn.LeakyReLU(0.2), |
| | nn.Linear(hidden_dim, latent_dim) |
| | ) |
| |
|
| | def forward(self, z, ticker_ids): |
| | emb = self.ticker_emb(ticker_ids) |
| | x = torch.cat([z, emb], dim=1) |
| | return self.net(x) |
| |
|
| | class ConditionalDiscriminator(nn.Module): |
| | def __init__(self, latent_dim, embed_dim, num_tickers, hidden_dim=128): |
| | super().__init__() |
| | self.ticker_emb = nn.Embedding(num_tickers, embed_dim) |
| | input_dim = latent_dim + embed_dim |
| | self.net = nn.Sequential( |
| | nn.Linear(input_dim, hidden_dim), |
| | nn.LeakyReLU(0.2), |
| | nn.Linear(hidden_dim, hidden_dim), |
| | nn.LeakyReLU(0.2), |
| | nn.Linear(hidden_dim, 1) |
| | ) |
| |
|
| | def forward(self, x, ticker_ids): |
| | emb = self.ticker_emb(ticker_ids) |
| | x_cat = torch.cat([x, emb], dim=1) |
| | return self.net(x_cat) |
| |
|
| | def gradient_penalty_cond(D, real, fake, ticker_ids, device): |
| | """Compute gradient penalty for conditional discriminator D(x, ticker_ids).""" |
| | batch_size = real.size(0) |
| | alpha = torch.rand(batch_size, 1).to(device) |
| | interpolates = (alpha * real + (1 - alpha) * fake).requires_grad_(True) |
| | d_interpolates = D(interpolates, ticker_ids) |
| | grad_outputs = torch.ones_like(d_interpolates).to(device) |
| | gradients = torch.autograd.grad( |
| | outputs=d_interpolates, |
| | inputs=interpolates, |
| | grad_outputs=grad_outputs, |
| | create_graph=True, |
| | retain_graph=True, |
| | only_inputs=True |
| | )[0] |
| | gradients = gradients.view(batch_size, -1) |
| | gp = ((gradients.norm(2, dim=1) - 1) ** 2).mean() |
| | return gp |
| |
|
| | if __name__ == "__main__": |
| | base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
| | latent_path = os.path.join(base_dir, "data", "processed", "latent_vectors.npy") |
| | ticker_path = os.path.join(base_dir, "data", "processed", "sequence_tickers.npy") |
| | models_dir = os.path.join(base_dir, "models") |
| | resources_dir = os.path.join(base_dir, "resources") |
| | os.makedirs(models_dir, exist_ok=True) |
| | os.makedirs(resources_dir, exist_ok=True) |
| |
|
| | logger.info("Loading latent vectors from: %s", latent_path) |
| | latent_vectors = np.load(latent_path) |
| | logger.info("Loaded latent vectors shape: %s", latent_vectors.shape) |
| |
|
| | logger.info("Loading sequence ticker IDs from: %s", ticker_path) |
| | sequence_tickers = np.load(ticker_path) |
| | logger.info("Loaded ticker IDs shape: %s", sequence_tickers.shape) |
| |
|
| | scaler = StandardScaler() |
| | latent_scaled = scaler.fit_transform(latent_vectors) |
| | scaler_save = {"mean": scaler.mean_.tolist(), "scale": scaler.scale_.tolist()} |
| | np.save(os.path.join(resources_dir, "latent_scaler.npy"), scaler_save) |
| | logger.info("Saved latent scaler params to resources.") |
| |
|
| | dataset = LatentTickerDataset(latent_path, ticker_path) |
| | dataset.latents = latent_scaled |
| | batch_size = 256 |
| | loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=True, num_workers=2) |
| |
|
| | noise_dim = 64 |
| | hidden_dim = 128 |
| | n_epochs = 300 |
| | lr = 1e-4 |
| | lambda_gp = 10 |
| | n_critic = 5 |
| | embed_dim = 16 |
| |
|
| | latent_dim = latent_scaled.shape[1] |
| | num_tickers = int(sequence_tickers.max()) + 1 |
| |
|
| | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| |
|
| | G = ConditionalGenerator(noise_dim=noise_dim, embed_dim=embed_dim, |
| | num_tickers=num_tickers, latent_dim=latent_dim, |
| | hidden_dim=hidden_dim).to(device) |
| | D = ConditionalDiscriminator(latent_dim=latent_dim, embed_dim=embed_dim, |
| | num_tickers=num_tickers, hidden_dim=hidden_dim).to(device) |
| |
|
| | opt_G = optim.Adam(G.parameters(), lr=lr, betas=(0.5, 0.9)) |
| | opt_D = optim.Adam(D.parameters(), lr=lr, betas=(0.5, 0.9)) |
| |
|
| | losses = {"epoch": [], "D_loss": [], "G_loss": []} |
| |
|
| | logger.info("Starting Conditional WGAN-GP training...") |
| | for epoch in range(n_epochs): |
| | D_losses_epoch = [] |
| | G_losses_epoch = [] |
| |
|
| | for real_batch, tickers_batch in tqdm(loader, desc=f"Epoch {epoch+1}/{n_epochs}", leave=False): |
| | real = real_batch.to(device) |
| | tickers = tickers_batch.to(device).long() |
| | bsize = real.size(0) |
| | |
| | for _ in range(n_critic): |
| | z = torch.randn(bsize, noise_dim).to(device) |
| | fake = G(z, tickers) |
| |
|
| | d_real = D(real, tickers) |
| | d_fake = D(fake.detach(), tickers) |
| |
|
| | gp = gradient_penalty_cond(D, real, fake.detach(), tickers, device) |
| | d_loss = -(d_real.mean() - d_fake.mean()) + lambda_gp * gp |
| |
|
| | opt_D.zero_grad() |
| | d_loss.backward() |
| | opt_D.step() |
| |
|
| | z = torch.randn(bsize, noise_dim).to(device) |
| | fake = G(z, tickers) |
| | g_loss = -D(fake, tickers).mean() |
| |
|
| | opt_G.zero_grad() |
| | g_loss.backward() |
| | opt_G.step() |
| |
|
| | D_losses_epoch.append(d_loss.item()) |
| | G_losses_epoch.append(g_loss.item()) |
| |
|
| | mean_D = float(np.mean(D_losses_epoch)) if len(D_losses_epoch) else 0.0 |
| | mean_G = float(np.mean(G_losses_epoch)) if len(G_losses_epoch) else 0.0 |
| |
|
| | losses["epoch"].append(epoch + 1) |
| | losses["D_loss"].append(mean_D) |
| | losses["G_loss"].append(mean_G) |
| |
|
| | logger.info(f"[{epoch+1}/{n_epochs}] D_loss={mean_D:.4f}, G_loss={mean_G:.4f}") |
| |
|
| |
|
| | losses_df = pd.DataFrame(losses) |
| | losses_csv_path = os.path.join(resources_dir, "latent_gan_losses.csv") |
| | losses_df.to_csv(losses_csv_path, index=False) |
| | logger.info("Saved training losses to %s", losses_csv_path) |
| |
|
| | torch.save(G.state_dict(), os.path.join(models_dir, "latent_gan_generator_conditional.pth")) |
| | torch.save(D.state_dict(), os.path.join(models_dir, "latent_gan_discriminator_conditional.pth")) |
| | logger.info("Saved GAN models to models/") |
| |
|
| | with open(os.path.join(resources_dir, "gan_config.json"), "w") as f: |
| | json.dump({ |
| | "model": "WGAN-GP-conditional", |
| | "noise_dim": noise_dim, |
| | "latent_dim": latent_dim, |
| | "hidden_dim": hidden_dim, |
| | "epochs": n_epochs, |
| | "batch_size": batch_size, |
| | "lr": lr, |
| | "lambda_gp": lambda_gp, |
| | "n_critic": n_critic, |
| | "embed_dim": embed_dim, |
| | "num_tickers": num_tickers |
| | }, f, indent=4) |
| |
|
| | logger.info("Saved GAN config to resources/gan_config.json") |
| | logger.info("Training completed successfully.") |
| |
|