Theob commited on
Commit
89b4b0c
·
verified ·
1 Parent(s): 991c919

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,3 +1,27 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - crypto
5
+ - deep-learning
6
+ - time-series
7
+ - forecasting
8
+ - transformer
9
+ - state-space-models
10
+ - open-source
11
+ - scaling-laws
12
+ library_name: transformers
13
+ ---
14
+
15
+ <div align="center">
16
+ <a href="https://www.duonlabs.com" target="_blank">
17
+ <img src="https://www.duonlabs.com/theme/images/duon_white.png" width="30%" alt="Duon Labs Logo" />
18
+ </a>
19
+ </div>
20
+ <h1 align="center" style="font-size: 3rem;">Apogée: Crypto Market Candlestick Dataset</h1>
21
+ <hr>
22
+
23
+ ## Overview
24
+
25
+ Most traders believe crypto is random, but deep learning scaling laws suggest otherwise. Apogée is an open-source research initiative exploring the **scaling laws of crypto market forecasting**. While financial markets are often assumed to be unpredictable, modern deep learning suggests that increasing data and compute could uncover measurable predictability.
26
+ Our goal is to **quantify how many bits of future price movement can be inferred** from historical candlestick data.
27
+ [More informations on Apogée](https://www.duonlabs.com/apogee)
apogee/model.py ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Full definition of a GPT Language Model, all of it in this single file.
3
+ References:
4
+ 1) the official GPT-2 TensorFlow implementation released by OpenAI:
5
+ https://github.com/openai/gpt-2/blob/master/src/model.py
6
+ 2) huggingface/transformers PyTorch implementation:
7
+ https://github.com/huggingface/transformers/blob/main/src/transformers/models/gpt2/modeling_gpt2.py
8
+ """
9
+ import json
10
+ import math
11
+ import inspect
12
+ import torch
13
+
14
+ from pathlib import Path
15
+ from typing import Optional, Union
16
+ from dataclasses import dataclass
17
+ from torch.nn import functional as F
18
+
19
+ class LayerNorm(torch.nn.Module):
20
+ """ LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """
21
+
22
+ def __init__(self, ndim, bias):
23
+ super().__init__()
24
+ self.weight = torch.nn.Parameter(torch.ones(ndim))
25
+ self.bias = torch.nn.Parameter(torch.zeros(ndim)) if bias else None
26
+
27
+ def forward(self, input):
28
+ return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
29
+
30
+ class CausalSelfAttention(torch.nn.Module):
31
+
32
+ def __init__(self, config):
33
+ super().__init__()
34
+ assert config.n_embd % config.n_head == 0
35
+ # key, query, value projections for all heads, but in a batch
36
+ self.c_attn = torch.nn.Linear(config.n_embd, 3 * config.n_embd, bias=config.bias)
37
+ # output projection
38
+ self.c_proj = torch.nn.Linear(config.n_embd, config.n_embd, bias=config.bias)
39
+ # regularization
40
+ self.attn_dropout = torch.nn.Dropout(config.dropout)
41
+ self.resid_dropout = torch.nn.Dropout(config.dropout)
42
+ self.n_head = config.n_head
43
+ self.n_embd = config.n_embd
44
+ self.dropout = config.dropout
45
+ # flash attention make GPU go brrrrr but support is only in PyTorch >= 2.0
46
+ self.flash = hasattr(torch.torch.nn.functional, 'scaled_dot_product_attention')
47
+ if not self.flash:
48
+ print("WARNING: using slow attention. Flash Attention requires PyTorch >= 2.0")
49
+ # causal mask to ensure that attention is only applied to the left in the input sequence
50
+ self.register_buffer("bias", torch.tril(torch.ones(config.block_size, config.block_size))
51
+ .view(1, 1, config.block_size, config.block_size))
52
+
53
+ def forward(self, x):
54
+ B, T, C = x.size() # batch size, sequence length, embedding dimensionality (n_embd)
55
+
56
+ # calculate query, key, values for all heads in batch and move head forward to be the batch dim
57
+ q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
58
+ k = k.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)
59
+ q = q.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)
60
+ v = v.view(B, T, self.n_head, C // self.n_head).transpose(1, 2) # (B, nh, T, hs)
61
+
62
+ # causal self-attention; Self-attend: (B, nh, T, hs) x (B, nh, hs, T) -> (B, nh, T, T)
63
+ if self.flash:
64
+ # efficient attention using Flash Attention CUDA kernels
65
+ y = torch.torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=None, dropout_p=self.dropout if self.training else 0, is_causal=True)
66
+ else:
67
+ # manual implementation of attention
68
+ att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
69
+ att = att.masked_fill(self.bias[:,:,:T,:T] == 0, float('-inf'))
70
+ att = F.softmax(att, dim=-1)
71
+ att = self.attn_dropout(att)
72
+ y = att @ v # (B, nh, T, T) x (B, nh, T, hs) -> (B, nh, T, hs)
73
+ y = y.transpose(1, 2).contiguous().view(B, T, C) # re-assemble all head outputs side by side
74
+
75
+ # output projection
76
+ y = self.resid_dropout(self.c_proj(y))
77
+ return y
78
+
79
+ class MLP(torch.nn.Module):
80
+
81
+ def __init__(self, config):
82
+ super().__init__()
83
+ self.c_fc = torch.nn.Linear(config.n_embd, 4 * config.n_embd, bias=config.bias)
84
+ self.gelu = torch.nn.GELU()
85
+ self.c_proj = torch.nn.Linear(4 * config.n_embd, config.n_embd, bias=config.bias)
86
+ self.dropout = torch.nn.Dropout(config.dropout)
87
+
88
+ def forward(self, x):
89
+ x = self.c_fc(x)
90
+ x = self.gelu(x)
91
+ x = self.c_proj(x)
92
+ x = self.dropout(x)
93
+ return x
94
+
95
+ class Block(torch.nn.Module):
96
+
97
+ def __init__(self, config):
98
+ super().__init__()
99
+ self.ln_1 = LayerNorm(config.n_embd, bias=config.bias)
100
+ self.attn = CausalSelfAttention(config)
101
+ self.ln_2 = LayerNorm(config.n_embd, bias=config.bias)
102
+ self.mlp = MLP(config)
103
+
104
+ def forward(self, x):
105
+ x = x + self.attn(self.ln_1(x))
106
+ x = x + self.mlp(self.ln_2(x))
107
+ return x
108
+
109
+ @dataclass
110
+ class ModelConfig:
111
+ block_size: int
112
+ vocab_size: int
113
+ n_layer: int = 3
114
+ n_head: Optional[int] = None
115
+ head_dim: Optional[int] = None
116
+ n_embd: int = 384
117
+ dropout: float = 0.0
118
+ mup_base_dim: int = 128
119
+ bias: bool = False
120
+
121
+ class GPT(torch.nn.Module):
122
+
123
+ def __init__(self, config):
124
+ super().__init__()
125
+ assert config.vocab_size is not None
126
+ assert config.block_size is not None
127
+ assert config.n_head is not None or config.head_dim is not None
128
+ self.config = config
129
+ if config.n_head is None:
130
+ config.n_head = config.n_embd // config.head_dim
131
+ if config.head_dim is None:
132
+ config.head_dim = config.n_embd // config.n_head
133
+
134
+ self.transformer = torch.nn.ModuleDict(dict(
135
+ wte = torch.nn.Embedding(config.vocab_size, config.n_embd),
136
+ # wpe = torch.nn.Embedding(config.block_size, config.n_embd),
137
+ wbe = torch.nn.Embedding(4, config.n_embd),
138
+ wce = torch.nn.Embedding(5, config.n_embd),
139
+ wpe = torch.nn.Embedding(config.block_size // 20, config.n_embd),
140
+ drop = torch.nn.Dropout(config.dropout),
141
+ h = torch.nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
142
+ ln_f = LayerNorm(config.n_embd, bias=config.bias),
143
+ ))
144
+ self.lm_head = torch.nn.Linear(config.n_embd, config.vocab_size, bias=False)
145
+ # with weight tying when using torch.compile() some warnings get generated:
146
+ # "UserWarning: functional_call was passed multiple values for tied weights.
147
+ # This behavior is deprecated and will be an error in future versions"
148
+ # not 100% sure what this is, so far seems to be harmless. TODO investigate
149
+ self.transformer.wte.weight = self.lm_head.weight # https://paperswithcode.com/method/weight-tying
150
+
151
+ # init all weights
152
+ self.apply(self._init_weights)
153
+ # apply special scaled init to the residual projections, per GPT-2 paper
154
+ for pn, p in self.named_parameters():
155
+ if pn.endswith('c_proj.weight'):
156
+ torch.torch.nn.init.normal_(p, mean=0.0, std=0.02/math.sqrt(2 * config.n_layer))
157
+
158
+ # report number of parameters
159
+ print("number of parameters: %.2fM" % (self.get_num_params()/1e6,))
160
+
161
+ def get_num_params(self, non_embedding=True):
162
+ """
163
+ Return the number of parameters in the model.
164
+ For non-embedding count (default), the position embeddings get subtracted.
165
+ The token embeddings would too, except due to the parameter sharing these
166
+ params are actually used as weights in the final layer, so we include them.
167
+ """
168
+ n_params = sum(p.numel() for p in self.parameters())
169
+ if non_embedding:
170
+ n_params -= self.transformer.wpe.weight.numel()
171
+ return n_params
172
+
173
+ def _init_weights(self, module):
174
+ if isinstance(module, torch.nn.Linear):
175
+ torch.torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
176
+ if module.bias is not None:
177
+ torch.torch.nn.init.zeros_(module.bias)
178
+ elif isinstance(module, torch.nn.Embedding):
179
+ torch.torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
180
+
181
+ def forward(self, idx):
182
+ device = idx.device
183
+ b, t = idx.size()
184
+ assert t <= self.config.block_size, f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
185
+ pos = torch.arange(0, t, dtype=torch.long, device=device) # shape (t)
186
+ # forward the GPT model itself
187
+ tok_emb = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd)
188
+ # pos_emb = self.transformer.wpe(pos) # position embeddings of shape (t, n_embd)
189
+ pos_emb = self.transformer.wbe(pos % 4) + self.transformer.wce(pos // 4 % 5) + self.transformer.wpe(pos // 20)
190
+ x = self.transformer.drop(tok_emb + pos_emb)
191
+ for block in self.transformer.h:
192
+ x = block(x)
193
+ x = self.transformer.ln_f(x)
194
+ logits = self.lm_head(x)
195
+ return logits
196
+
197
+ def crop_block_size(self, block_size):
198
+ # model surgery to decrease the block size if necessary
199
+ # e.g. we may load the GPT2 pretrained model checkpoint (block size 1024)
200
+ # but want to use a smaller block size for some smaller, simpler model
201
+ assert block_size <= self.config.block_size
202
+ self.config.block_size = block_size
203
+ self.transformer.wpe.weight = torch.nn.Parameter(self.transformer.wpe.weight[:block_size])
204
+ for block in self.transformer.h:
205
+ if hasattr(block.attn, 'bias'):
206
+ block.attn.bias = block.attn.bias[:,:,:block_size,:block_size]
207
+
208
+ def configure_optimizers(self, weight_decay, learning_rate, betas, device_type):
209
+ # start with all of the candidate parameters
210
+ param_dict = {pn: p for pn, p in self.named_parameters()}
211
+ # filter out those that do not require grad
212
+ param_dict = {pn: p for pn, p in param_dict.items() if p.requires_grad}
213
+ # create optim groups. Any parameters that is 2D will be weight decayed, otherwise no.
214
+ # i.e. all weight tensors in matmuls + embeddings decay, all biases and layernorms don't.
215
+ decay_params = [p for n, p in param_dict.items() if p.dim() >= 2]
216
+ nodecay_params = [p for n, p in param_dict.items() if p.dim() < 2]
217
+ optim_groups = [
218
+ {'params': decay_params, 'weight_decay': weight_decay},
219
+ {'params': nodecay_params, 'weight_decay': 0.0}
220
+ ]
221
+ num_decay_params = sum(p.numel() for p in decay_params)
222
+ num_nodecay_params = sum(p.numel() for p in nodecay_params)
223
+ print(f"num decayed parameter tensors: {len(decay_params)}, with {num_decay_params:,} parameters")
224
+ print(f"num non-decayed parameter tensors: {len(nodecay_params)}, with {num_nodecay_params:,} parameters")
225
+ # Create AdamW optimizer and use the fused version if it is available
226
+ fused_available = 'fused' in inspect.signature(torch.optim.AdamW).parameters
227
+ use_fused = fused_available and device_type.startswith('cuda')
228
+ extra_args = dict(fused=True) if use_fused else dict()
229
+ optimizer = torch.optim.AdamW(optim_groups, lr=learning_rate, betas=betas, **extra_args)
230
+ print(f"using fused AdamW: {use_fused}")
231
+
232
+ return optimizer
233
+
234
+ def estimate_mfu(self, fwdbwd_per_iter, dt):
235
+ """ estimate model flops utilization (MFU) in units of A100 bfloat16 peak FLOPS """
236
+ # first estimate the number of flops we do per iteration.
237
+ # see PaLM paper Appendix B as ref: https://arxiv.org/abs/2204.02311
238
+ N = self.get_num_params()
239
+ cfg = self.config
240
+ L, H, Q, T = cfg.n_layer, cfg.n_head, cfg.n_embd//cfg.n_head, cfg.block_size
241
+ flops_per_token = 6*N + 12*L*H*Q*T
242
+ flops_per_fwdbwd = flops_per_token * T
243
+ flops_per_iter = flops_per_fwdbwd * fwdbwd_per_iter
244
+ # express our flops throughput as ratio of A100 bfloat16 peak flops
245
+ flops_achieved = flops_per_iter * (1.0/dt) # per second
246
+ flops_promised = 312e12 # A100 GPU bfloat16 peak flops is 312 TFLOPS
247
+ mfu = flops_achieved / flops_promised
248
+ return mfu
249
+
250
+ @torch.no_grad()
251
+ def generate(self, idx, max_new_tokens, temperature=1.0, top_k=None):
252
+ """
253
+ Take a conditioning sequence of indices idx (LongTensor of shape (b,t)) and complete
254
+ the sequence max_new_tokens times, feeding the predictions back into the model each time.
255
+ Most likely you'll want to make sure to be in model.eval() mode of operation for this.
256
+ """
257
+ for _ in range(max_new_tokens):
258
+ # if the sequence context is growing too long we must crop it at block_size
259
+ idx_cond = idx if idx.size(1) <= self.config.block_size else idx[:, -self.config.block_size:]
260
+ # forward the model to get the logits for the index in the sequence
261
+ logits = self(idx_cond)
262
+ # pluck the logits at the final step and scale by desired temperature
263
+ logits = logits[:, -1, :] / temperature
264
+ # optionally crop the logits to only the top k options
265
+ if top_k is not None:
266
+ v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
267
+ logits[logits < v[:, [-1]]] = -float('Inf')
268
+ # apply softmax to convert logits to (normalized) probabilities
269
+ probs = F.softmax(logits, dim=-1)
270
+ # sample from the distribution
271
+ idx_next = torch.multinomial(probs, num_samples=1)
272
+ # append sampled index to the running sequence and continue
273
+ idx = torch.cat((idx, idx_next), dim=1)
274
+
275
+ return idx
276
+
277
+ @staticmethod
278
+ def from_config_file(config_file: Union[str, Path]):
279
+ with open(config_file, 'r') as f:
280
+ config_data = json.load(f)
281
+ return GPT(ModelConfig(**config_data))
apogee/tokenizer.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import torch
3
+
4
+ import numpy as np
5
+
6
+ from typing import Union
7
+
8
+ class Tokenizer:
9
+ @property
10
+ def vocabulary_size(self) -> int:
11
+ """Return the size of the vocabulary"""
12
+ return 257
13
+
14
+ @property
15
+ def tokens_per_candle(self) -> int:
16
+ """Return the number of tokens per candle"""
17
+ return 4 * 5
18
+
19
+ def encode(self, candles: Union[np.array, torch.Tensor]) -> torch.Tensor:
20
+ """Tokenize candles into tokens."""
21
+ if isinstance(candles, np.ndarray): # Wrap into a tensor
22
+ candles = torch.tensor(candles)
23
+ candles = (candles.view(torch.int32) << 1).view(torch.float32) # Erase the sign bit to fit the exponent into the first byte
24
+ if sys.byteorder == 'little':# On little-endian systems, we need to byteswap the data so that msb is first
25
+ candles.untyped_storage().byteswap(torch.float32)
26
+ buffer = candles.view(torch.uint8) # Interpret the data as bytes ("tokenization" step)
27
+ buffer = buffer.view(-1).to(torch.uint16) # Flatten the data and convert to uint16 because otherwise <BOS> will overflow
28
+ buffer = torch.cat([torch.tensor([256], dtype=torch.uint16), buffer]) # Prepend <BOS> (Begin of Series) token
29
+ return buffer
30
+
31
+ def decode(self, tokens: torch.Tensor) -> torch.Tensor:
32
+ """Decode tokens into candles."""
33
+ tokens = tokens.long()
34
+ candles_tokens = tokens[..., 1:] # Remove <BOS> token
35
+ candles_tokens = candles_tokens.to(torch.uint8).view(*tokens.shape[:-1], -1, self.tokens_per_candle) # Convert back to uint8 and reshape
36
+ candles_tokens = candles_tokens.view(torch.float32) # Interpret the data as floats
37
+ if sys.byteorder == 'little': # On little-endian systems, we need to byteswap the data back
38
+ # candles_tokens.untyped_storage().byteswap(torch.float32) # <-- This segfaults for some reason
39
+ candles_tokens = candles_tokens.view(torch.uint8).view(*candles_tokens.shape, 4).flip(-1).view(torch.float32).squeeze(-1)# Workaround
40
+ candles_tokens = -((candles_tokens.view(torch.int32) >> 1) | (1 << 31)).view(torch.float32) # Restore the sign bit
41
+ return candles_tokens
assets/candles_binance.BTCUSDT_1m.png ADDED
assets/candles_binance.BTCUSDT_8h.png ADDED
assets/candles_binance.DOGEUSDT_2h.png ADDED
ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb44264a9b2d3215978459a20f6ac5b3ce56a6cf89ffb3b863ea1e7770c7563c
3
+ size 28918050
handler.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from contextlib import nullcontext
2
+ import time
3
+ import torch
4
+ from apogee.tokenizer import Tokenizer
5
+ from apogee.model import GPT, ModelConfig
6
+
7
+ from typing import Any, Dict, Optional, Union
8
+ from pathlib import Path
9
+
10
+ torch.backends.cuda.matmul.allow_tf32 = True # allow tf32 on matmul
11
+ torch.backends.cudnn.allow_tf32 = True # allow tf32 on cudnn
12
+
13
+ class ApogeeHandler:
14
+ """
15
+ Handler class.
16
+ """
17
+
18
+ def __init__(self, base_path: Optional[Union[str, Path]] = None, device: Optional[str] = None):
19
+ if base_path is None:
20
+ base_path = Path(__file__).parent
21
+ self.base_path = Path(base_path)
22
+ # Get the device
23
+ if device is None:
24
+ device = "cuda" if torch.cuda.is_available() else "cpu"
25
+ self.device = device
26
+ print(f"Handler spwaned on device {self.device} 🚀")
27
+ ckpt_path = self.base_path / "ckpt.pt"
28
+ print(f"Loading model from {ckpt_path} 🤖")
29
+ checkpoint = torch.load(ckpt_path, map_location=device)
30
+ self.config = ModelConfig(**checkpoint["model_config"])
31
+ self.tokenizer = Tokenizer()
32
+ self.model = GPT(self.config)
33
+ state_dict = checkpoint['model']
34
+ unwanted_prefix = '_orig_mod.'
35
+ for k in list(state_dict.keys()):
36
+ if k.startswith(unwanted_prefix):
37
+ state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
38
+ self.model.load_state_dict(state_dict)
39
+ self.model.eval()
40
+ self.model.to(self.device)
41
+ self.model = torch.compile(self.model)
42
+ dtype = 'bfloat16' if torch.cuda.is_available() and torch.cuda.is_bf16_supported() else 'float16' # 'float32' or 'bfloat16' or 'float16'
43
+ ptdtype = {'float32': torch.float32, 'bfloat16': torch.bfloat16, 'float16': torch.float16}[dtype]
44
+ self.ctx = nullcontext() if device == 'cpu' else torch.amp.autocast(device_type=device, dtype=ptdtype)
45
+ print("Warming up hardware 🔥")
46
+ with torch.no_grad(), self.ctx:
47
+ self.model(torch.randint(0, self.tokenizer.vocabulary_size, (1, self.config.block_size), device=self.device))
48
+ print("Model ready ! ✅")
49
+ # Precompute useful values
50
+ self.max_candles = self.config.block_size // self.tokenizer.tokens_per_candle
51
+
52
+ def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
53
+ """
54
+ Args:
55
+ data (Dict[str, Any]):
56
+ inputs: Dict[str, List[float]] with keys:
57
+ timestamps: Timestamps of the time serie
58
+ open: Open prices
59
+ high: High prices
60
+ low: Low prices
61
+ close: Close prices
62
+ volume: Volumes
63
+ steps: int = 4 | Number of sampling steps
64
+ n_scenarios: int = 32 | Number of scenarios to generate
65
+ seed: Optional[int] = None | Seed for the random number generator
66
+ Return:
67
+ Dict[str, Any] Generated scenarios with keys:
68
+ timestamps: Timestamps of the time serie
69
+ open: Open prices
70
+ high: High prices
71
+ low: Low prices
72
+ close: Close prices
73
+ volume: Volumes
74
+ """
75
+ t_start = time.time() # Start the timer
76
+ # Unpack input data
77
+ inputs = data.pop("inputs", data)
78
+ # Validate the inputs
79
+ assert "timestamps" in inputs and "open" in inputs and "high" in inputs and "low" in inputs and "close" in inputs and "volume" in inputs, "Required keys: timestamps, open, high, low, close, volume"
80
+ assert isinstance(inputs["timestamps"], list) and isinstance(inputs["open"], list) and isinstance(inputs["high"], list) and isinstance(inputs["low"], list) and isinstance(inputs["close"], list) and isinstance(inputs["volume"], list), "Inputs must be lists"
81
+ assert len(inputs["timestamps"]) == len(inputs["open"]) == len(inputs["high"]) == len(inputs["low"]) == len(inputs["close"]) == len(inputs["volume"]), "Inputs must have the same length"
82
+ timestamps = torch.tensor(inputs["timestamps"])
83
+ samples = torch.tensor([inputs["open"], inputs["high"], inputs["low"], inputs["close"], inputs["volume"]], dtype=torch.float32).T.contiguous()
84
+ steps = data.pop("steps", 4)
85
+ n_scenarios = data.pop("n_scenarios", 32)
86
+ seed = data.pop("seed", None)
87
+ # Validate the params
88
+ assert isinstance(steps, int) and steps > 0, "steps must be a positive integer"
89
+ assert isinstance(n_scenarios, int) and n_scenarios > 0, "n_scenarios must be a positive integer"
90
+ if seed is not None:
91
+ assert isinstance(seed, int), "seed must be an integer"
92
+ torch.manual_seed(seed)
93
+ torch.cuda.manual_seed(seed)
94
+ # Generate scenarios
95
+ samples = samples[-self.max_candles + steps:] # Keep only the last candles that fit in the model's context
96
+ tokens = self.tokenizer.encode(samples) # Encode the samples into tokens
97
+ tokens = tokens.to(self.device).unsqueeze(0).long() # Add a batch dimension
98
+ with torch.no_grad(), self.ctx:
99
+ for _ in range(steps * self.tokenizer.tokens_per_candle):
100
+ assert tokens.shape[1] <= self.config.block_size, "Too many tokens in the sequence"
101
+ logits = self.model(tokens) # forward the model to get the logits for the index in the sequence
102
+ logits = logits[:, -1, :] # pluck the logits at the final step
103
+ # apply softmax to convert logits to (normalized) probabilities
104
+ probs = torch.nn.functional.softmax(logits, dim=-1)
105
+ # sample from the distribution
106
+ if probs.shape[0] != n_scenarios:
107
+ next_tokens = torch.multinomial(probs, num_samples=n_scenarios, replacement=True).T
108
+ tokens = tokens.expand(n_scenarios, -1)
109
+ else:
110
+ next_tokens = torch.multinomial(probs, num_samples=1)
111
+ # append sampled index to the running sequence and continue
112
+ tokens = torch.cat((tokens, next_tokens), dim=1)
113
+ # Decode the tokens back into samples
114
+ scenarios = self.tokenizer.decode(tokens)[:, -steps:]
115
+ print(f"Generated {n_scenarios} scenarios in {time.time() - t_start:.2f} seconds ⏱")
116
+ return {
117
+ "timestamps": (timestamps[-1] + torch.arange(1, steps+1) * torch.median(torch.diff(timestamps)).item()).tolist(),
118
+ "open": scenarios[:, :, 0].tolist(),
119
+ "high": scenarios[:, :, 1].tolist(),
120
+ "low": scenarios[:, :, 2].tolist(),
121
+ "close": scenarios[:, :, 3].tolist(),
122
+ "volume": scenarios[:, :, 4].tolist()
123
+ }
124
+
125
+ if __name__ == "__main__":
126
+ import pandas as pd
127
+ handler = ApogeeHandler()
128
+ test_path = Path(__file__).parents[2] / "tests" / "assets" / "BTCUSDT-1m-2019-03.csv"
129
+ with open(test_path, "r") as f:
130
+ data = pd.read_csv(f)
131
+ y = handler({
132
+ "inputs": {
133
+ "timestamps": data[data.columns[0]].tolist(),
134
+ "open": data[data.columns[1]].tolist(),
135
+ "high": data[data.columns[2]].tolist(),
136
+ "low": data[data.columns[3]].tolist(),
137
+ "close": data[data.columns[4]].tolist(),
138
+ "volume": data[data.columns[5]].tolist()
139
+ },
140
+ "steps": 4,
141
+ "n_scenarios": 64,
142
+ "seed": 42
143
+ })