Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

README.md +27 -3
apogee/model.py +281 -0
apogee/tokenizer.py +41 -0
assets/candles_binance.BTCUSDT_1m.png +0 -0
assets/candles_binance.BTCUSDT_8h.png +0 -0
assets/candles_binance.DOGEUSDT_2h.png +0 -0
ckpt.pt +3 -0
handler.py +143 -0

README.md CHANGED Viewed

@@ -1,3 +1,27 @@
----
-license: apache-2.0
----

+---
+license: apache-2.0
+tags:
+- crypto
+- deep-learning
+- time-series
+- forecasting
+- transformer
+- state-space-models
+- open-source
+- scaling-laws
+library_name: transformers
+---
+<div align="center">
+  <a href="https://www.duonlabs.com" target="_blank">
+    <img src="https://www.duonlabs.com/theme/images/duon_white.png" width="30%" alt="Duon Labs Logo" />
+  </a>
+</div>
+<h1 align="center" style="font-size: 3rem;">Apogée: Crypto Market Candlestick Dataset</h1>
+<hr>
+## Overview
+Most traders believe crypto is random, but deep learning scaling laws suggest otherwise. Apogée is an open-source research initiative exploring the **scaling laws of crypto market forecasting**. While financial markets are often assumed to be unpredictable, modern deep learning suggests that increasing data and compute could uncover measurable predictability.
+Our goal is to **quantify how many bits of future price movement can be inferred** from historical candlestick data.
+[More informations on Apogée](https://www.duonlabs.com/apogee)

apogee/model.py ADDED Viewed

	@@ -0,0 +1,281 @@

+"""
+Full definition of a GPT Language Model, all of it in this single file.
+References:
+1) the official GPT-2 TensorFlow implementation released by OpenAI:
+https://github.com/openai/gpt-2/blob/master/src/model.py
+2) huggingface/transformers PyTorch implementation:
+https://github.com/huggingface/transformers/blob/main/src/transformers/models/gpt2/modeling_gpt2.py
+"""
+import json
+import math
+import inspect
+import torch
+from pathlib import Path
+from typing import Optional, Union
+from dataclasses import dataclass
+from torch.nn import functional as F
+class LayerNorm(torch.nn.Module):
+    """ LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """
+    def __init__(self, ndim, bias):
+        super().__init__()
+        self.weight = torch.nn.Parameter(torch.ones(ndim))
+        self.bias = torch.nn.Parameter(torch.zeros(ndim)) if bias else None
+    def forward(self, input):
+        return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
+class CausalSelfAttention(torch.nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        assert config.n_embd % config.n_head == 0
+        # key, query, value projections for all heads, but in a batch
+        self.c_attn = torch.nn.Linear(config.n_embd, 3 * config.n_embd, bias=config.bias)
+        # output projection
+        self.c_proj = torch.nn.Linear(config.n_embd, config.n_embd, bias=config.bias)
+        # regularization
+        self.attn_dropout = torch.nn.Dropout(config.dropout)
+        self.resid_dropout = torch.nn.Dropout(config.dropout)
+        self.n_head = config.n_head
+        self.n_embd = config.n_embd
+        self.dropout = config.dropout
+        # flash attention make GPU go brrrrr but support is only in PyTorch >= 2.0
+        self.flash = hasattr(torch.torch.nn.functional, 'scaled_dot_product_attention')
+        if not self.flash:
+            print("WARNING: using slow attention. Flash Attention requires PyTorch >= 2.0")
+            # causal mask to ensure that attention is only applied to the left in the input sequence
+            self.register_buffer("bias", torch.tril(torch.ones(config.block_size, config.block_size))
+                                        .view(1, 1, config.block_size, config.block_size))
+    def forward(self, x):
+        B, T, C = x.size() # batch size, sequence length, embedding dimensionality (n_embd)
+        # calculate query, key, values for all heads in batch and move head forward to be the batch dim
+        q, k, v  = self.c_attn(x).split(self.n_embd, dim=2)
+        k = k.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)
+        q = q.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)
+        v = v.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)
+        # causal self-attention; Self-attend: (B, nh, T, hs) x (B, nh, hs, T) -> (B, nh, T, T)
+        if self.flash:
+            # efficient attention using Flash Attention CUDA kernels
+            y = torch.torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=None, dropout_p=self.dropout if self.training else 0, is_causal=True)
+        else:
+            # manual implementation of attention
+            att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
+            att = att.masked_fill(self.bias[:,:,:T,:T] == 0, float('-inf'))
+            att = F.softmax(att, dim=-1)
+            att = self.attn_dropout(att)
+            y = att @ v # (B, nh, T, T) x (B, nh, T, hs) -> (B, nh, T, hs)
+        y = y.transpose(1, 2).contiguous().view(B, T, C) # re-assemble all head outputs side by side
+        # output projection
+        y = self.resid_dropout(self.c_proj(y))
+        return y
+class MLP(torch.nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.c_fc    = torch.nn.Linear(config.n_embd, 4 * config.n_embd, bias=config.bias)
+        self.gelu    = torch.nn.GELU()
+        self.c_proj  = torch.nn.Linear(4 * config.n_embd, config.n_embd, bias=config.bias)
+        self.dropout = torch.nn.Dropout(config.dropout)
+    def forward(self, x):
+        x = self.c_fc(x)
+        x = self.gelu(x)
+        x = self.c_proj(x)
+        x = self.dropout(x)
+        return x
+class Block(torch.nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.ln_1 = LayerNorm(config.n_embd, bias=config.bias)
+        self.attn = CausalSelfAttention(config)
+        self.ln_2 = LayerNorm(config.n_embd, bias=config.bias)
+        self.mlp = MLP(config)
+    def forward(self, x):
+        x = x + self.attn(self.ln_1(x))
+        x = x + self.mlp(self.ln_2(x))
+        return x
+@dataclass
+class ModelConfig:
+    block_size: int
+    vocab_size: int
+    n_layer: int = 3
+    n_head: Optional[int] = None
+    head_dim: Optional[int] = None
+    n_embd: int = 384
+    dropout: float = 0.0
+    mup_base_dim: int = 128
+    bias: bool = False
+class GPT(torch.nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        assert config.vocab_size is not None
+        assert config.block_size is not None
+        assert config.n_head is not None or config.head_dim is not None
+        self.config = config
+        if config.n_head is None:
+            config.n_head = config.n_embd // config.head_dim
+        if config.head_dim is None:
+            config.head_dim = config.n_embd // config.n_head
+        self.transformer = torch.nn.ModuleDict(dict(
+            wte = torch.nn.Embedding(config.vocab_size, config.n_embd),
+            # wpe = torch.nn.Embedding(config.block_size, config.n_embd),
+            wbe = torch.nn.Embedding(4, config.n_embd),
+            wce = torch.nn.Embedding(5, config.n_embd),
+            wpe = torch.nn.Embedding(config.block_size // 20, config.n_embd),
+            drop = torch.nn.Dropout(config.dropout),
+            h = torch.nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
+            ln_f = LayerNorm(config.n_embd, bias=config.bias),
+        ))
+        self.lm_head = torch.nn.Linear(config.n_embd, config.vocab_size, bias=False)
+        # with weight tying when using torch.compile() some warnings get generated:
+        # "UserWarning: functional_call was passed multiple values for tied weights.
+        # This behavior is deprecated and will be an error in future versions"
+        # not 100% sure what this is, so far seems to be harmless. TODO investigate
+        self.transformer.wte.weight = self.lm_head.weight # https://paperswithcode.com/method/weight-tying
+        # init all weights
+        self.apply(self._init_weights)
+        # apply special scaled init to the residual projections, per GPT-2 paper
+        for pn, p in self.named_parameters():
+            if pn.endswith('c_proj.weight'):
+                torch.torch.nn.init.normal_(p, mean=0.0, std=0.02/math.sqrt(2 * config.n_layer))
+        # report number of parameters
+        print("number of parameters: %.2fM" % (self.get_num_params()/1e6,))
+    def get_num_params(self, non_embedding=True):
+        """
+        Return the number of parameters in the model.
+        For non-embedding count (default), the position embeddings get subtracted.
+        The token embeddings would too, except due to the parameter sharing these
+        params are actually used as weights in the final layer, so we include them.
+        """
+        n_params = sum(p.numel() for p in self.parameters())
+        if non_embedding:
+            n_params -= self.transformer.wpe.weight.numel()
+        return n_params
+    def _init_weights(self, module):
+        if isinstance(module, torch.nn.Linear):
+            torch.torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
+            if module.bias is not None:
+                torch.torch.nn.init.zeros_(module.bias)
+        elif isinstance(module, torch.nn.Embedding):
+            torch.torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
+    def forward(self, idx):
+        device = idx.device
+        b, t = idx.size()
+        assert t <= self.config.block_size, f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
+        pos = torch.arange(0, t, dtype=torch.long, device=device) # shape (t)
+        # forward the GPT model itself
+        tok_emb = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd)
+        # pos_emb = self.transformer.wpe(pos) # position embeddings of shape (t, n_embd)
+        pos_emb = self.transformer.wbe(pos % 4) + self.transformer.wce(pos // 4 % 5) + self.transformer.wpe(pos // 20)
+        x = self.transformer.drop(tok_emb + pos_emb)
+        for block in self.transformer.h:
+            x = block(x)
+        x = self.transformer.ln_f(x)
+        logits = self.lm_head(x)
+        return logits
+    def crop_block_size(self, block_size):
+        # model surgery to decrease the block size if necessary
+        # e.g. we may load the GPT2 pretrained model checkpoint (block size 1024)
+        # but want to use a smaller block size for some smaller, simpler model
+        assert block_size <= self.config.block_size
+        self.config.block_size = block_size
+        self.transformer.wpe.weight = torch.nn.Parameter(self.transformer.wpe.weight[:block_size])
+        for block in self.transformer.h:
+            if hasattr(block.attn, 'bias'):
+                block.attn.bias = block.attn.bias[:,:,:block_size,:block_size]
+    def configure_optimizers(self, weight_decay, learning_rate, betas, device_type):
+        # start with all of the candidate parameters
+        param_dict = {pn: p for pn, p in self.named_parameters()}
+        # filter out those that do not require grad
+        param_dict = {pn: p for pn, p in param_dict.items() if p.requires_grad}
+        # create optim groups. Any parameters that is 2D will be weight decayed, otherwise no.
+        # i.e. all weight tensors in matmuls + embeddings decay, all biases and layernorms don't.
+        decay_params = [p for n, p in param_dict.items() if p.dim() >= 2]
+        nodecay_params = [p for n, p in param_dict.items() if p.dim() < 2]
+        optim_groups = [
+            {'params': decay_params, 'weight_decay': weight_decay},
+            {'params': nodecay_params, 'weight_decay': 0.0}
+        ]
+        num_decay_params = sum(p.numel() for p in decay_params)
+        num_nodecay_params = sum(p.numel() for p in nodecay_params)
+        print(f"num decayed parameter tensors: {len(decay_params)}, with {num_decay_params:,} parameters")
+        print(f"num non-decayed parameter tensors: {len(nodecay_params)}, with {num_nodecay_params:,} parameters")
+        # Create AdamW optimizer and use the fused version if it is available
+        fused_available = 'fused' in inspect.signature(torch.optim.AdamW).parameters
+        use_fused = fused_available and device_type.startswith('cuda')
+        extra_args = dict(fused=True) if use_fused else dict()
+        optimizer = torch.optim.AdamW(optim_groups, lr=learning_rate, betas=betas, **extra_args)
+        print(f"using fused AdamW: {use_fused}")
+        return optimizer
+    def estimate_mfu(self, fwdbwd_per_iter, dt):
+        """ estimate model flops utilization (MFU) in units of A100 bfloat16 peak FLOPS """
+        # first estimate the number of flops we do per iteration.
+        # see PaLM paper Appendix B as ref: https://arxiv.org/abs/2204.02311
+        N = self.get_num_params()
+        cfg = self.config
+        L, H, Q, T = cfg.n_layer, cfg.n_head, cfg.n_embd//cfg.n_head, cfg.block_size
+        flops_per_token = 6*N + 12*L*H*Q*T
+        flops_per_fwdbwd = flops_per_token * T
+        flops_per_iter = flops_per_fwdbwd * fwdbwd_per_iter
+        # express our flops throughput as ratio of A100 bfloat16 peak flops
+        flops_achieved = flops_per_iter * (1.0/dt) # per second
+        flops_promised = 312e12 # A100 GPU bfloat16 peak flops is 312 TFLOPS
+        mfu = flops_achieved / flops_promised
+        return mfu
+    @torch.no_grad()
+    def generate(self, idx, max_new_tokens, temperature=1.0, top_k=None):
+        """
+        Take a conditioning sequence of indices idx (LongTensor of shape (b,t)) and complete
+        the sequence max_new_tokens times, feeding the predictions back into the model each time.
+        Most likely you'll want to make sure to be in model.eval() mode of operation for this.
+        """
+        for _ in range(max_new_tokens):
+            # if the sequence context is growing too long we must crop it at block_size
+            idx_cond = idx if idx.size(1) <= self.config.block_size else idx[:, -self.config.block_size:]
+            # forward the model to get the logits for the index in the sequence
+            logits = self(idx_cond)
+            # pluck the logits at the final step and scale by desired temperature
+            logits = logits[:, -1, :] / temperature
+            # optionally crop the logits to only the top k options
+            if top_k is not None:
+                v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
+                logits[logits < v[:, [-1]]] = -float('Inf')
+            # apply softmax to convert logits to (normalized) probabilities
+            probs = F.softmax(logits, dim=-1)
+            # sample from the distribution
+            idx_next = torch.multinomial(probs, num_samples=1)
+            # append sampled index to the running sequence and continue
+            idx = torch.cat((idx, idx_next), dim=1)
+        return idx
+    @staticmethod
+    def from_config_file(config_file: Union[str, Path]):
+        with open(config_file, 'r') as f:
+            config_data = json.load(f)
+        return GPT(ModelConfig(**config_data))

apogee/tokenizer.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import sys
+import torch
+import numpy as np
+from typing import Union
+class Tokenizer:
+    @property
+    def vocabulary_size(self) -> int:
+        """Return the size of the vocabulary"""
+        return 257
+    @property
+    def tokens_per_candle(self) -> int:
+        """Return the number of tokens per candle"""
+        return 4 * 5
+    def encode(self, candles: Union[np.array, torch.Tensor]) -> torch.Tensor:
+        """Tokenize candles into tokens."""
+        if isinstance(candles, np.ndarray): # Wrap into a tensor
+            candles = torch.tensor(candles)
+        candles = (candles.view(torch.int32) << 1).view(torch.float32) # Erase the sign bit to fit the exponent into the first byte
+        if sys.byteorder == 'little':# On little-endian systems, we need to byteswap the data so that msb is first
+            candles.untyped_storage().byteswap(torch.float32)
+        buffer = candles.view(torch.uint8) # Interpret the data as bytes ("tokenization" step)
+        buffer = buffer.view(-1).to(torch.uint16) # Flatten the data and convert to uint16 because otherwise <BOS> will overflow
+        buffer = torch.cat([torch.tensor([256], dtype=torch.uint16), buffer]) # Prepend <BOS> (Begin of Series) token
+        return buffer
+    def decode(self, tokens: torch.Tensor) -> torch.Tensor:
+        """Decode tokens into candles."""
+        tokens = tokens.long()
+        candles_tokens = tokens[..., 1:] # Remove <BOS> token
+        candles_tokens = candles_tokens.to(torch.uint8).view(*tokens.shape[:-1], -1, self.tokens_per_candle) # Convert back to uint8 and reshape
+        candles_tokens = candles_tokens.view(torch.float32) # Interpret the data as floats
+        if sys.byteorder == 'little': # On little-endian systems, we need to byteswap the data back
+            # candles_tokens.untyped_storage().byteswap(torch.float32) # <-- This segfaults for some reason
+            candles_tokens = candles_tokens.view(torch.uint8).view(*candles_tokens.shape, 4).flip(-1).view(torch.float32).squeeze(-1)# Workaround
+        candles_tokens = -((candles_tokens.view(torch.int32) >> 1) | (1 << 31)).view(torch.float32) # Restore the sign bit
+        return candles_tokens

assets/candles_binance.BTCUSDT_1m.png ADDED Viewed

assets/candles_binance.BTCUSDT_8h.png ADDED Viewed

assets/candles_binance.DOGEUSDT_2h.png ADDED Viewed

ckpt.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb44264a9b2d3215978459a20f6ac5b3ce56a6cf89ffb3b863ea1e7770c7563c
+size 28918050

handler.py ADDED Viewed

	@@ -0,0 +1,143 @@

+from contextlib import nullcontext
+import time
+import torch
+from apogee.tokenizer import Tokenizer
+from apogee.model import GPT, ModelConfig
+from typing import Any, Dict, Optional, Union
+from pathlib import Path
+torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul
+torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn
+class ApogeeHandler:
+    """
+    Handler class.
+    """
+    def __init__(self, base_path: Optional[Union[str, Path]] = None, device: Optional[str] = None):
+        if base_path is None:
+            base_path = Path(__file__).parent
+        self.base_path = Path(base_path)
+        # Get the device
+        if device is None:
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+        self.device = device
+        print(f"Handler spwaned on device {self.device} 🚀")
+        ckpt_path = self.base_path / "ckpt.pt"
+        print(f"Loading model from {ckpt_path} 🤖")
+        checkpoint = torch.load(ckpt_path, map_location=device)
+        self.config = ModelConfig(**checkpoint["model_config"])
+        self.tokenizer = Tokenizer()
+        self.model = GPT(self.config)
+        state_dict = checkpoint['model']
+        unwanted_prefix = '_orig_mod.'
+        for k in list(state_dict.keys()):
+            if k.startswith(unwanted_prefix):
+                state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
+        self.model.load_state_dict(state_dict)
+        self.model.eval()
+        self.model.to(self.device)
+        self.model = torch.compile(self.model)
+        dtype = 'bfloat16' if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else 'float16' # 'float32' or 'bfloat16' or 'float16'
+        ptdtype = {'float32': torch.float32, 'bfloat16': torch.bfloat16, 'float16': torch.float16}[dtype]
+        self.ctx = nullcontext() if device == 'cpu' else torch.amp.autocast(device_type=device, dtype=ptdtype)
+        print("Warming up hardware 🔥")
+        with torch.no_grad(), self.ctx:
+            self.model(torch.randint(0, self.tokenizer.vocabulary_size, (1, self.config.block_size), device=self.device))
+        print("Model ready ! ✅")
+        # Precompute useful values
+        self.max_candles = self.config.block_size // self.tokenizer.tokens_per_candle
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Args:
+            data (Dict[str, Any]):
+                inputs: Dict[str, List[float]] with keys:
+                    timestamps: Timestamps of the time serie
+                    open: Open prices
+                    high: High prices
+                    low: Low prices
+                    close: Close prices
+                    volume: Volumes
+                steps: int = 4 | Number of sampling steps
+                n_scenarios: int = 32 | Number of scenarios to generate
+                seed: Optional[int] = None | Seed for the random number generator
+        Return:
+            Dict[str, Any] Generated scenarios with keys:
+                timestamps: Timestamps of the time serie
+                open: Open prices
+                high: High prices
+                low: Low prices
+                close: Close prices
+                volume: Volumes
+        """
+        t_start = time.time() # Start the timer
+        # Unpack input data
+        inputs = data.pop("inputs", data)
+        # Validate the inputs
+        assert "timestamps" in inputs and "open" in inputs and "high" in inputs and "low" in inputs and "close" in inputs and "volume" in inputs, "Required keys: timestamps, open, high, low, close, volume"
+        assert isinstance(inputs["timestamps"], list) and isinstance(inputs["open"], list) and isinstance(inputs["high"], list) and isinstance(inputs["low"], list) and isinstance(inputs["close"], list) and isinstance(inputs["volume"], list), "Inputs must be lists"
+        assert len(inputs["timestamps"]) == len(inputs["open"]) == len(inputs["high"]) == len(inputs["low"]) == len(inputs["close"]) == len(inputs["volume"]), "Inputs must have the same length"
+        timestamps = torch.tensor(inputs["timestamps"])
+        samples = torch.tensor([inputs["open"], inputs["high"], inputs["low"], inputs["close"], inputs["volume"]], dtype=torch.float32).T.contiguous()
+        steps = data.pop("steps", 4)
+        n_scenarios = data.pop("n_scenarios", 32)
+        seed = data.pop("seed", None)
+        # Validate the params
+        assert isinstance(steps, int) and steps > 0, "steps must be a positive integer"
+        assert isinstance(n_scenarios, int) and n_scenarios > 0, "n_scenarios must be a positive integer"
+        if seed is not None:
+            assert isinstance(seed, int), "seed must be an integer"
+            torch.manual_seed(seed)
+            torch.cuda.manual_seed(seed)
+        # Generate scenarios
+        samples = samples[-self.max_candles + steps:] # Keep only the last candles that fit in the model's context
+        tokens = self.tokenizer.encode(samples) # Encode the samples into tokens
+        tokens = tokens.to(self.device).unsqueeze(0).long() # Add a batch dimension
+        with torch.no_grad(), self.ctx:
+            for _ in range(steps * self.tokenizer.tokens_per_candle):
+                assert tokens.shape[1] <= self.config.block_size, "Too many tokens in the sequence"
+                logits = self.model(tokens) # forward the model to get the logits for the index in the sequence
+                logits = logits[:, -1, :] # pluck the logits at the final step
+                # apply softmax to convert logits to (normalized) probabilities
+                probs = torch.nn.functional.softmax(logits, dim=-1)
+                # sample from the distribution
+                if probs.shape[0] != n_scenarios:
+                    next_tokens = torch.multinomial(probs, num_samples=n_scenarios, replacement=True).T
+                    tokens = tokens.expand(n_scenarios, -1)
+                else:
+                    next_tokens = torch.multinomial(probs, num_samples=1)
+                # append sampled index to the running sequence and continue
+                tokens = torch.cat((tokens, next_tokens), dim=1)
+        # Decode the tokens back into samples
+        scenarios = self.tokenizer.decode(tokens)[:, -steps:]
+        print(f"Generated {n_scenarios} scenarios in {time.time() - t_start:.2f} seconds ⏱")
+        return {
+            "timestamps": (timestamps[-1] + torch.arange(1, steps+1) * torch.median(torch.diff(timestamps)).item()).tolist(),
+            "open": scenarios[:, :, 0].tolist(),
+            "high": scenarios[:, :, 1].tolist(),
+            "low": scenarios[:, :, 2].tolist(),
+            "close": scenarios[:, :, 3].tolist(),
+            "volume": scenarios[:, :, 4].tolist()
+        }
+if __name__ == "__main__":
+    import pandas as pd
+    handler = ApogeeHandler()
+    test_path = Path(__file__).parents[2] / "tests" / "assets" / "BTCUSDT-1m-2019-03.csv"
+    with open(test_path, "r") as f:
+        data = pd.read_csv(f)
+    y = handler({
+        "inputs": {
+            "timestamps": data[data.columns[0]].tolist(),
+            "open": data[data.columns[1]].tolist(),
+            "high": data[data.columns[2]].tolist(),
+            "low": data[data.columns[3]].tolist(),
+            "close": data[data.columns[4]].tolist(),
+            "volume": data[data.columns[5]].tolist()
+        },
+        "steps": 4,
+        "n_scenarios": 64,
+        "seed": 42
+    })