Commit ·
9b43be1
1
Parent(s): 8341a12
Upload folder using huggingface_hub
Browse files- README.md +48 -0
- __init__.py +5 -0
- config.json +115 -0
- configuration_hawkes.py +4 -0
- model-checkpoint.pth +2 -2
- model.safetensors +3 -0
- model_architecture.txt +86 -0
- modeling_hawkes.py +4 -0
- pytorch_model.bin +3 -0
- train_parameters.yaml +198 -0
README.md
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- time-series
|
| 4 |
+
- temporal-point-processes
|
| 5 |
+
- hawkes-processes
|
| 6 |
+
- scientific-ml
|
| 7 |
+
license: mit
|
| 8 |
+
library_name: transformers
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# FIM-PP Model Card
|
| 12 |
+
|
| 13 |
+
`FIM-PP` is the Foundation Inference Model for marked temporal point processes.
|
| 14 |
+
It infers conditional intensity functions from a context set of event sequences and
|
| 15 |
+
supports zero-shot use as well as downstream fine-tuning.
|
| 16 |
+
|
| 17 |
+
## Loading
|
| 18 |
+
|
| 19 |
+
Install the `fim` package first, then load the model with Transformers:
|
| 20 |
+
|
| 21 |
+
```python
|
| 22 |
+
from transformers import AutoModel
|
| 23 |
+
|
| 24 |
+
model = AutoModel.from_pretrained("FIM4Science/FIM-PP", trust_remote_code=True)
|
| 25 |
+
model.eval()
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
## Notes
|
| 29 |
+
|
| 30 |
+
- The released checkpoint is configured for up to 22 event marks.
|
| 31 |
+
- The model expects Hawkes-style context and inference tensors as described in the
|
| 32 |
+
OpenFIM point-process tutorial.
|
| 33 |
+
- If needed, the lower-level fallback remains available through
|
| 34 |
+
`fim.models.hawkes.FIMHawkes.load_model(...)`.
|
| 35 |
+
|
| 36 |
+
## Reference
|
| 37 |
+
|
| 38 |
+
If you use this model, please cite:
|
| 39 |
+
|
| 40 |
+
```bibtex
|
| 41 |
+
@inproceedings{fim_pp,
|
| 42 |
+
title={In-Context Learning of Temporal Point Processes with Foundation Inference Models},
|
| 43 |
+
author={David Berghaus and Patrick Seifner and Kostadin Cvejoski and Cesar Ojeda and Ramses J. Sanchez},
|
| 44 |
+
booktitle={The Fourteenth International Conference on Learning Representations},
|
| 45 |
+
year={2026},
|
| 46 |
+
url={https://openreview.net/forum?id=h9HwUAODFP}
|
| 47 |
+
}
|
| 48 |
+
```
|
__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .configuration_hawkes import FIMHawkesConfig
|
| 2 |
+
from .modeling_hawkes import FIMHawkes
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
__all__ = ["FIMHawkes", "FIMHawkesConfig"]
|
config.json
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha_decoder": {
|
| 3 |
+
"hidden_act": {
|
| 4 |
+
"name": "torch.nn.GELU"
|
| 5 |
+
},
|
| 6 |
+
"hidden_layers": [
|
| 7 |
+
256,
|
| 8 |
+
256
|
| 9 |
+
],
|
| 10 |
+
"name": "fim.models.blocks.base.MLP"
|
| 11 |
+
},
|
| 12 |
+
"architectures": [
|
| 13 |
+
"FIMHawkes"
|
| 14 |
+
],
|
| 15 |
+
"auto_map": {
|
| 16 |
+
"AutoConfig": "configuration_hawkes.FIMHawkesConfig",
|
| 17 |
+
"AutoModel": "modeling_hawkes.FIMHawkes"
|
| 18 |
+
},
|
| 19 |
+
"beta_decoder": {
|
| 20 |
+
"hidden_act": {
|
| 21 |
+
"name": "torch.nn.GELU"
|
| 22 |
+
},
|
| 23 |
+
"hidden_layers": [
|
| 24 |
+
256,
|
| 25 |
+
256
|
| 26 |
+
],
|
| 27 |
+
"name": "fim.models.blocks.base.MLP"
|
| 28 |
+
},
|
| 29 |
+
"context_summary_encoder": {
|
| 30 |
+
"encoder_layer": {
|
| 31 |
+
"batch_first": true,
|
| 32 |
+
"dropout": 0.0,
|
| 33 |
+
"name": "torch.nn.TransformerEncoderLayer",
|
| 34 |
+
"nhead": 4
|
| 35 |
+
},
|
| 36 |
+
"name": "torch.nn.TransformerEncoder",
|
| 37 |
+
"num_layers": 2
|
| 38 |
+
},
|
| 39 |
+
"context_summary_pooling": {
|
| 40 |
+
"attention": {
|
| 41 |
+
"nhead": 4
|
| 42 |
+
},
|
| 43 |
+
"name": "fim.models.blocks.neural_operators.AttentionOperator",
|
| 44 |
+
"num_res_layers": 1,
|
| 45 |
+
"paths_block_attention": false
|
| 46 |
+
},
|
| 47 |
+
"context_ts_encoder": {
|
| 48 |
+
"encoder_layer": {
|
| 49 |
+
"batch_first": true,
|
| 50 |
+
"dropout": 0.0,
|
| 51 |
+
"name": "torch.nn.TransformerEncoderLayer",
|
| 52 |
+
"nhead": 4
|
| 53 |
+
},
|
| 54 |
+
"name": "torch.nn.TransformerEncoder",
|
| 55 |
+
"num_layers": 4
|
| 56 |
+
},
|
| 57 |
+
"decoder_ts": {
|
| 58 |
+
"decoder_layer": {
|
| 59 |
+
"batch_first": true,
|
| 60 |
+
"dropout": 0.0,
|
| 61 |
+
"name": "torch.nn.TransformerDecoderLayer",
|
| 62 |
+
"nhead": 4
|
| 63 |
+
},
|
| 64 |
+
"name": "torch.nn.TransformerDecoder",
|
| 65 |
+
"num_layers": 4
|
| 66 |
+
},
|
| 67 |
+
"delta_time_encoder": {
|
| 68 |
+
"name": "fim.models.blocks.positional_encodings.SineTimeEncoding",
|
| 69 |
+
"out_features": 256
|
| 70 |
+
},
|
| 71 |
+
"evaluation_mark_encoder": {
|
| 72 |
+
"name": "torch.nn.Linear"
|
| 73 |
+
},
|
| 74 |
+
"hidden_act": {
|
| 75 |
+
"name": "torch.nn.GELU"
|
| 76 |
+
},
|
| 77 |
+
"hidden_dim": 256,
|
| 78 |
+
"loss_weights": {
|
| 79 |
+
"alpha": 0.0,
|
| 80 |
+
"mu": 0.0,
|
| 81 |
+
"nll": 1.0,
|
| 82 |
+
"relative_spike": 0.0,
|
| 83 |
+
"smape": 0.0
|
| 84 |
+
},
|
| 85 |
+
"mark_encoder": {
|
| 86 |
+
"name": "torch.nn.Linear",
|
| 87 |
+
"out_features": 256
|
| 88 |
+
},
|
| 89 |
+
"mark_fusion_attention": null,
|
| 90 |
+
"max_num_marks": 22,
|
| 91 |
+
"mu_decoder": {
|
| 92 |
+
"hidden_act": {
|
| 93 |
+
"name": "torch.nn.GELU"
|
| 94 |
+
},
|
| 95 |
+
"hidden_layers": [
|
| 96 |
+
256,
|
| 97 |
+
256
|
| 98 |
+
],
|
| 99 |
+
"name": "fim.models.blocks.base.MLP"
|
| 100 |
+
},
|
| 101 |
+
"nll": {
|
| 102 |
+
"method": "monte_carlo",
|
| 103 |
+
"num_integration_points": 200
|
| 104 |
+
},
|
| 105 |
+
"normalize_by_max_time": false,
|
| 106 |
+
"normalize_times": true,
|
| 107 |
+
"thinning": null,
|
| 108 |
+
"time_encoder": {
|
| 109 |
+
"name": "fim.models.blocks.positional_encodings.SineTimeEncoding",
|
| 110 |
+
"out_features": 256
|
| 111 |
+
},
|
| 112 |
+
"torch_dtype": "float32",
|
| 113 |
+
"transformers_version": "4.46.0",
|
| 114 |
+
"model_type": "fimhawkes"
|
| 115 |
+
}
|
configuration_hawkes.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fim.models.hawkes import FIMHawkesConfig
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
__all__ = ["FIMHawkesConfig"]
|
model-checkpoint.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb9f416bc1059d11675a67d9693f7a094ebb4a8c0b6d1c3dfcc2a662280f2dca
|
| 3 |
+
size 64569191
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f6099a698340f62e0e61bccd1c3e8b4c37a36134927676b99259064d8686b0f
|
| 3 |
+
size 64527620
|
model_architecture.txt
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
===============================================================================================
|
| 2 |
+
Layer (type:depth-idx) Output Shape Param #
|
| 3 |
+
===============================================================================================
|
| 4 |
+
FIMHawkes -- 256
|
| 5 |
+
├─SineTimeEncoding: 1-1 [6, 1, 100, 256] --
|
| 6 |
+
│ └─Linear: 2-1 [6, 1, 100, 1] 2
|
| 7 |
+
│ └─Sequential: 2-2 [6, 1, 100, 255] --
|
| 8 |
+
│ │ └─Linear: 3-1 [6, 1, 100, 255] 510
|
| 9 |
+
│ │ └─SinActivation: 3-2 [6, 1, 100, 255] --
|
| 10 |
+
├─SineTimeEncoding: 1-2 [6, 1, 100, 256] --
|
| 11 |
+
│ └─Linear: 2-3 [6, 1, 100, 1] 2
|
| 12 |
+
│ └─Sequential: 2-4 [6, 1, 100, 255] --
|
| 13 |
+
│ │ └─Linear: 3-3 [6, 1, 100, 255] 510
|
| 14 |
+
│ │ └─SinActivation: 3-4 [6, 1, 100, 255] --
|
| 15 |
+
├─Linear: 1-3 [600, 256] 5,888
|
| 16 |
+
├─LayerNorm: 1-4 [6, 1, 100, 256] 512
|
| 17 |
+
├─SineTimeEncoding: 1-5 [6, 1999, 100, 256] (recursive)
|
| 18 |
+
│ └─Linear: 2-5 [6, 1999, 100, 1] (recursive)
|
| 19 |
+
│ └─Sequential: 2-6 [6, 1999, 100, 255] (recursive)
|
| 20 |
+
│ │ └─Linear: 3-5 [6, 1999, 100, 255] (recursive)
|
| 21 |
+
│ │ └─SinActivation: 3-6 [6, 1999, 100, 255] --
|
| 22 |
+
├─SineTimeEncoding: 1-6 [6, 1999, 100, 256] (recursive)
|
| 23 |
+
│ └─Linear: 2-7 [6, 1999, 100, 1] (recursive)
|
| 24 |
+
│ └─Sequential: 2-8 [6, 1999, 100, 255] (recursive)
|
| 25 |
+
│ │ └─Linear: 3-7 [6, 1999, 100, 255] (recursive)
|
| 26 |
+
│ │ └─SinActivation: 3-8 [6, 1999, 100, 255] --
|
| 27 |
+
├─Linear: 1-7 [1199400, 256] (recursive)
|
| 28 |
+
├─LayerNorm: 1-8 [6, 1999, 100, 256] (recursive)
|
| 29 |
+
├─TransformerEncoder: 1-9 [11994, 100, 256] --
|
| 30 |
+
│ └─ModuleList: 2-9 -- --
|
| 31 |
+
│ │ └─TransformerEncoderLayer: 3-9 [11994, 100, 256] 1,315,072
|
| 32 |
+
│ │ └─TransformerEncoderLayer: 3-10 [11994, 100, 256] 1,315,072
|
| 33 |
+
│ │ └─TransformerEncoderLayer: 3-11 [11994, 100, 256] 1,315,072
|
| 34 |
+
│ │ └─TransformerEncoderLayer: 3-12 [11994, 100, 256] 1,315,072
|
| 35 |
+
├─AttentionOperator: 1-10 [11994, 1, 256] --
|
| 36 |
+
│ └─ModuleList: 2-10 -- --
|
| 37 |
+
│ │ └─ResidualAttentionLayer: 3-13 [11994, 1, 256] 1,315,072
|
| 38 |
+
├─TransformerEncoder: 1-11 [6, 1999, 256] --
|
| 39 |
+
│ └─ModuleList: 2-11 -- --
|
| 40 |
+
│ │ └─TransformerEncoderLayer: 3-14 [6, 1999, 256] 1,315,072
|
| 41 |
+
│ │ └─TransformerEncoderLayer: 3-15 [6, 1999, 256] 1,315,072
|
| 42 |
+
├─TransformerDecoder: 1-12 [6, 100, 256] --
|
| 43 |
+
│ └─ModuleList: 2-12 -- --
|
| 44 |
+
│ │ └─TransformerDecoderLayer: 3-16 [6, 100, 256] 1,578,752
|
| 45 |
+
│ │ └─TransformerDecoderLayer: 3-17 [6, 100, 256] 1,578,752
|
| 46 |
+
│ │ └─TransformerDecoderLayer: 3-18 [6, 100, 256] 1,578,752
|
| 47 |
+
│ │ └─TransformerDecoderLayer: 3-19 [6, 100, 256] 1,578,752
|
| 48 |
+
├─Linear: 1-13 [1, 256] 5,888
|
| 49 |
+
├─MLP: 1-14 [600, 1] --
|
| 50 |
+
│ └─Sequential: 2-13 [600, 1] --
|
| 51 |
+
│ │ └─Linear: 3-20 [600, 256] 131,328
|
| 52 |
+
│ │ └─GELU: 3-21 [600, 256] --
|
| 53 |
+
│ │ └─Dropout: 3-22 [600, 256] --
|
| 54 |
+
│ │ └─Linear: 3-23 [600, 256] 65,792
|
| 55 |
+
│ │ └─GELU: 3-24 [600, 256] --
|
| 56 |
+
│ │ └─Dropout: 3-25 [600, 256] --
|
| 57 |
+
│ │ └─Linear: 3-26 [600, 1] 257
|
| 58 |
+
├─MLP: 1-15 [600, 1] --
|
| 59 |
+
│ └─Sequential: 2-14 [600, 1] --
|
| 60 |
+
│ │ └─Linear: 3-27 [600, 256] 131,328
|
| 61 |
+
│ │ └─GELU: 3-28 [600, 256] --
|
| 62 |
+
│ │ └─Dropout: 3-29 [600, 256] --
|
| 63 |
+
│ │ └─Linear: 3-30 [600, 256] 65,792
|
| 64 |
+
│ │ └─GELU: 3-31 [600, 256] --
|
| 65 |
+
│ │ └─Dropout: 3-32 [600, 256] --
|
| 66 |
+
│ │ └─Linear: 3-33 [600, 1] 257
|
| 67 |
+
├─MLP: 1-16 [600, 1] --
|
| 68 |
+
│ └─Sequential: 2-15 [600, 1] --
|
| 69 |
+
│ │ └─Linear: 3-34 [600, 256] 131,328
|
| 70 |
+
│ │ └─GELU: 3-35 [600, 256] --
|
| 71 |
+
│ │ └─Dropout: 3-36 [600, 256] --
|
| 72 |
+
│ │ └─Linear: 3-37 [600, 256] 65,792
|
| 73 |
+
│ │ └─GELU: 3-38 [600, 256] --
|
| 74 |
+
│ │ └─Dropout: 3-39 [600, 256] --
|
| 75 |
+
│ │ └─Linear: 3-40 [600, 1] 257
|
| 76 |
+
===============================================================================================
|
| 77 |
+
Total params: 16,126,211
|
| 78 |
+
Trainable params: 16,126,211
|
| 79 |
+
Non-trainable params: 0
|
| 80 |
+
Total mult-adds (Units.GIGABYTES): 70.54
|
| 81 |
+
===============================================================================================
|
| 82 |
+
Input size (MB): 28.96
|
| 83 |
+
Forward/backward pass size (MB): 118787.71
|
| 84 |
+
Params size (MB): 48.71
|
| 85 |
+
Estimated Total Size (MB): 118865.38
|
| 86 |
+
===============================================================================================
|
modeling_hawkes.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fim.models.hawkes import FIMHawkes
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
__all__ = ["FIMHawkes"]
|
pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c01bd55e9770f9df86a038c27b4997314f68a1b9abc565f8b983e171bbaf7fb6
|
| 3 |
+
size 64568539
|
train_parameters.yaml
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset:
|
| 2 |
+
dataset_kwargs:
|
| 3 |
+
field_name_for_dimension_grouping: base_intensity_functions
|
| 4 |
+
files_to_load:
|
| 5 |
+
base_intensity_functions: base_intensity_functions.pt
|
| 6 |
+
event_times: event_times.pt
|
| 7 |
+
event_types: event_types.pt
|
| 8 |
+
kernel_functions: kernel_functions.pt
|
| 9 |
+
time_offsets: time_offsets.pt
|
| 10 |
+
shuffle: true
|
| 11 |
+
loader_kwargs:
|
| 12 |
+
batch_size: 6
|
| 13 |
+
full_len_ratio: 0.1
|
| 14 |
+
max_number_of_minibatch_sizes: 8
|
| 15 |
+
max_path_count: 2000
|
| 16 |
+
max_sequence_len: 100
|
| 17 |
+
min_path_count: 400
|
| 18 |
+
min_sequence_len: 15
|
| 19 |
+
num_inference_paths: 1
|
| 20 |
+
num_inference_times: 2000
|
| 21 |
+
num_workers: 16
|
| 22 |
+
test_batch_size: 2
|
| 23 |
+
variable_num_of_paths: true
|
| 24 |
+
variable_sequence_lens:
|
| 25 |
+
train: true
|
| 26 |
+
validation: false
|
| 27 |
+
name: HawkesDataLoader
|
| 28 |
+
path:
|
| 29 |
+
train: !!python/tuple
|
| 30 |
+
- data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_exp_kernel/train
|
| 31 |
+
- data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_exp_kernel_no_interactions/train
|
| 32 |
+
- data/synthetic_data/hawkes/1k_1D_2k_paths_Gamma_base_exp_kernel/train
|
| 33 |
+
- data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_exp_kernel/train
|
| 34 |
+
- data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_exp_kernel_no_interactions/train
|
| 35 |
+
- data/synthetic_data/hawkes/1k_5D_2k_paths_Gamma_base_exp_kernel/train
|
| 36 |
+
- data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel/train
|
| 37 |
+
- data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel_no_interactions/train
|
| 38 |
+
- data/synthetic_data/hawkes/1k_10D_2k_paths_poisson/train
|
| 39 |
+
- data/synthetic_data/hawkes/1k_10D_2k_paths_Gamma_base_exp_kernel/train
|
| 40 |
+
- data/synthetic_data/hawkes/1k_10D_2k_paths_sin_base_exp_kernel/train
|
| 41 |
+
- data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel/train
|
| 42 |
+
- data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel_no_interactions/train
|
| 43 |
+
- data/synthetic_data/hawkes/1k_15D_2k_paths_poisson/train
|
| 44 |
+
- data/synthetic_data/hawkes/1k_15D_2k_paths_Gamma_base_exp_kernel/train
|
| 45 |
+
- data/synthetic_data/hawkes/1k_15D_2k_paths_sin_base_exp_kernel/train
|
| 46 |
+
- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel/train
|
| 47 |
+
- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_no_interactions/train
|
| 48 |
+
- data/synthetic_data/hawkes/5k_22D_2k_paths_poisson/train
|
| 49 |
+
- data/synthetic_data/hawkes/5k_22D_2k_paths_Gamma_base_exp_kernel/train
|
| 50 |
+
- data/synthetic_data/hawkes/5k_22D_2k_paths_sin_base_exp_kernel/train
|
| 51 |
+
- data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel_sparse/train
|
| 52 |
+
- data/synthetic_data/hawkes/1k_10D_2k_paths_Gamma_base_exp_kernel_sparse/train
|
| 53 |
+
- data/synthetic_data/hawkes/1k_10D_2k_paths_sin_base_exp_kernel_sparse/train
|
| 54 |
+
- data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel_sparse/train
|
| 55 |
+
- data/synthetic_data/hawkes/1k_15D_2k_paths_Gamma_base_exp_kernel_sparse/train
|
| 56 |
+
- data/synthetic_data/hawkes/1k_15D_2k_paths_sin_base_exp_kernel_sparse/train
|
| 57 |
+
- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_sparse/train
|
| 58 |
+
- data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_rayleigh_kernel/train
|
| 59 |
+
- data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_rayleigh_kernel/train
|
| 60 |
+
- data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_rayleigh_kernel/train
|
| 61 |
+
- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel/train
|
| 62 |
+
- data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_rayleigh_kernel_sparse/train
|
| 63 |
+
- data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_rayleigh_kernel_sparse/train
|
| 64 |
+
- data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_rayleigh_kernel_sparse/train
|
| 65 |
+
- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel_sparse/train
|
| 66 |
+
validation: !!python/tuple
|
| 67 |
+
- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel/val
|
| 68 |
+
- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_no_interactions/val
|
| 69 |
+
- data/synthetic_data/hawkes/5k_22D_2k_paths_poisson/val
|
| 70 |
+
- data/synthetic_data/hawkes/5k_22D_2k_paths_Gamma_base_exp_kernel/val
|
| 71 |
+
- data/synthetic_data/hawkes/5k_22D_2k_paths_sin_base_exp_kernel/val
|
| 72 |
+
- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel/val
|
| 73 |
+
- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel_sparse/val
|
| 74 |
+
distributed:
|
| 75 |
+
activation_chekpoint: false
|
| 76 |
+
checkpoint_type: full_state
|
| 77 |
+
enabled: false
|
| 78 |
+
min_num_params: 1e5
|
| 79 |
+
sharding_strategy: NO_SHARD
|
| 80 |
+
wrap_policy: SIZE_BAZED
|
| 81 |
+
experiment:
|
| 82 |
+
device_map: auto
|
| 83 |
+
name: FIM_Hawkes_10-22st_nll_mc_only_2000_paths_mixed_100_events_mixed-experiment-seed-10-dataset-dataset_kwargs-field_name_for_dimension_grouping-base_intensity_functions
|
| 84 |
+
name_add_date: true
|
| 85 |
+
seed: 10
|
| 86 |
+
model:
|
| 87 |
+
alpha_decoder:
|
| 88 |
+
hidden_act:
|
| 89 |
+
name: torch.nn.GELU
|
| 90 |
+
hidden_layers: !!python/tuple
|
| 91 |
+
- 256
|
| 92 |
+
- 256
|
| 93 |
+
name: fim.models.blocks.base.MLP
|
| 94 |
+
beta_decoder:
|
| 95 |
+
hidden_act:
|
| 96 |
+
name: torch.nn.GELU
|
| 97 |
+
hidden_layers: !!python/tuple
|
| 98 |
+
- 256
|
| 99 |
+
- 256
|
| 100 |
+
name: fim.models.blocks.base.MLP
|
| 101 |
+
context_summary_encoder:
|
| 102 |
+
encoder_layer:
|
| 103 |
+
batch_first: true
|
| 104 |
+
dropout: 0.0
|
| 105 |
+
name: torch.nn.TransformerEncoderLayer
|
| 106 |
+
nhead: 4
|
| 107 |
+
name: torch.nn.TransformerEncoder
|
| 108 |
+
num_layers: 2
|
| 109 |
+
context_summary_pooling:
|
| 110 |
+
attention:
|
| 111 |
+
nhead: 4
|
| 112 |
+
name: fim.models.blocks.neural_operators.AttentionOperator
|
| 113 |
+
num_res_layers: 1
|
| 114 |
+
paths_block_attention: false
|
| 115 |
+
context_ts_encoder:
|
| 116 |
+
encoder_layer:
|
| 117 |
+
batch_first: true
|
| 118 |
+
dropout: 0.0
|
| 119 |
+
name: torch.nn.TransformerEncoderLayer
|
| 120 |
+
nhead: 4
|
| 121 |
+
name: torch.nn.TransformerEncoder
|
| 122 |
+
num_layers: 4
|
| 123 |
+
decoder_ts:
|
| 124 |
+
decoder_layer:
|
| 125 |
+
batch_first: true
|
| 126 |
+
dropout: 0.0
|
| 127 |
+
name: torch.nn.TransformerDecoderLayer
|
| 128 |
+
nhead: 4
|
| 129 |
+
name: torch.nn.TransformerDecoder
|
| 130 |
+
num_layers: 4
|
| 131 |
+
delta_time_encoder:
|
| 132 |
+
name: fim.models.blocks.positional_encodings.SineTimeEncoding
|
| 133 |
+
out_features: 256
|
| 134 |
+
evaluation_mark_encoder:
|
| 135 |
+
name: torch.nn.Linear
|
| 136 |
+
hidden_act:
|
| 137 |
+
name: torch.nn.GELU
|
| 138 |
+
hidden_dim: 256
|
| 139 |
+
loss_weights:
|
| 140 |
+
alpha: 0.0
|
| 141 |
+
mu: 0.0
|
| 142 |
+
nll: 1.0
|
| 143 |
+
relative_spike: 0.0
|
| 144 |
+
smape: 0.0
|
| 145 |
+
mark_encoder:
|
| 146 |
+
name: torch.nn.Linear
|
| 147 |
+
out_features: 256
|
| 148 |
+
mark_fusion_attention: null
|
| 149 |
+
max_num_marks: 22
|
| 150 |
+
model_type: fimhawkes
|
| 151 |
+
mu_decoder:
|
| 152 |
+
hidden_act:
|
| 153 |
+
name: torch.nn.GELU
|
| 154 |
+
hidden_layers: !!python/tuple
|
| 155 |
+
- 256
|
| 156 |
+
- 256
|
| 157 |
+
name: fim.models.blocks.base.MLP
|
| 158 |
+
nll:
|
| 159 |
+
method: monte_carlo
|
| 160 |
+
num_integration_points: 200
|
| 161 |
+
normalize_by_max_time: false
|
| 162 |
+
normalize_times: true
|
| 163 |
+
thinning: null
|
| 164 |
+
time_encoder:
|
| 165 |
+
name: fim.models.blocks.positional_encodings.SineTimeEncoding
|
| 166 |
+
out_features: 256
|
| 167 |
+
optimizers: !!python/tuple
|
| 168 |
+
- optimizer_d:
|
| 169 |
+
lr: 5.0e-05
|
| 170 |
+
name: torch.optim.AdamW
|
| 171 |
+
weight_decay: 0.0001
|
| 172 |
+
trainer:
|
| 173 |
+
best_metric: loss
|
| 174 |
+
debug_iterations: null
|
| 175 |
+
detect_anomaly: false
|
| 176 |
+
epochs: 100000
|
| 177 |
+
evaluation_epoch:
|
| 178 |
+
enable_plotting: false
|
| 179 |
+
inference_path_idx: 0
|
| 180 |
+
iterator_name: validation
|
| 181 |
+
path: fim.trainers.evaluation_epochs.HawkesEvaluationPlots
|
| 182 |
+
plot_frequency: 10
|
| 183 |
+
experiment_dir: ./results/
|
| 184 |
+
gradient_accumulation_steps: 6
|
| 185 |
+
logging_format: RANK_%(rank)s - %(asctime)s - %(name)s - %(levelname)s - %(message)s
|
| 186 |
+
name: Trainer
|
| 187 |
+
precision: bf16_mixed
|
| 188 |
+
save_every: 1
|
| 189 |
+
schedulers: !!python/tuple
|
| 190 |
+
- beta: 1.0
|
| 191 |
+
label: gauss_nll
|
| 192 |
+
name: fim.utils.param_scheduler.ConstantScheduler
|
| 193 |
+
- beta: 1.0
|
| 194 |
+
label: init_cross_entropy
|
| 195 |
+
name: fim.utils.param_scheduler.ConstantScheduler
|
| 196 |
+
- beta: 1.0
|
| 197 |
+
label: missing_link
|
| 198 |
+
name: fim.utils.param_scheduler.ConstantScheduler
|