david-berghaus-fh commited on
Commit
9b43be1
·
1 Parent(s): 8341a12

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - time-series
4
+ - temporal-point-processes
5
+ - hawkes-processes
6
+ - scientific-ml
7
+ license: mit
8
+ library_name: transformers
9
+ ---
10
+
11
+ # FIM-PP Model Card
12
+
13
+ `FIM-PP` is the Foundation Inference Model for marked temporal point processes.
14
+ It infers conditional intensity functions from a context set of event sequences and
15
+ supports zero-shot use as well as downstream fine-tuning.
16
+
17
+ ## Loading
18
+
19
+ Install the `fim` package first, then load the model with Transformers:
20
+
21
+ ```python
22
+ from transformers import AutoModel
23
+
24
+ model = AutoModel.from_pretrained("FIM4Science/FIM-PP", trust_remote_code=True)
25
+ model.eval()
26
+ ```
27
+
28
+ ## Notes
29
+
30
+ - The released checkpoint is configured for up to 22 event marks.
31
+ - The model expects Hawkes-style context and inference tensors as described in the
32
+ OpenFIM point-process tutorial.
33
+ - If needed, the lower-level fallback remains available through
34
+ `fim.models.hawkes.FIMHawkes.load_model(...)`.
35
+
36
+ ## Reference
37
+
38
+ If you use this model, please cite:
39
+
40
+ ```bibtex
41
+ @inproceedings{fim_pp,
42
+ title={In-Context Learning of Temporal Point Processes with Foundation Inference Models},
43
+ author={David Berghaus and Patrick Seifner and Kostadin Cvejoski and Cesar Ojeda and Ramses J. Sanchez},
44
+ booktitle={The Fourteenth International Conference on Learning Representations},
45
+ year={2026},
46
+ url={https://openreview.net/forum?id=h9HwUAODFP}
47
+ }
48
+ ```
__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from .configuration_hawkes import FIMHawkesConfig
2
+ from .modeling_hawkes import FIMHawkes
3
+
4
+
5
+ __all__ = ["FIMHawkes", "FIMHawkesConfig"]
config.json ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_decoder": {
3
+ "hidden_act": {
4
+ "name": "torch.nn.GELU"
5
+ },
6
+ "hidden_layers": [
7
+ 256,
8
+ 256
9
+ ],
10
+ "name": "fim.models.blocks.base.MLP"
11
+ },
12
+ "architectures": [
13
+ "FIMHawkes"
14
+ ],
15
+ "auto_map": {
16
+ "AutoConfig": "configuration_hawkes.FIMHawkesConfig",
17
+ "AutoModel": "modeling_hawkes.FIMHawkes"
18
+ },
19
+ "beta_decoder": {
20
+ "hidden_act": {
21
+ "name": "torch.nn.GELU"
22
+ },
23
+ "hidden_layers": [
24
+ 256,
25
+ 256
26
+ ],
27
+ "name": "fim.models.blocks.base.MLP"
28
+ },
29
+ "context_summary_encoder": {
30
+ "encoder_layer": {
31
+ "batch_first": true,
32
+ "dropout": 0.0,
33
+ "name": "torch.nn.TransformerEncoderLayer",
34
+ "nhead": 4
35
+ },
36
+ "name": "torch.nn.TransformerEncoder",
37
+ "num_layers": 2
38
+ },
39
+ "context_summary_pooling": {
40
+ "attention": {
41
+ "nhead": 4
42
+ },
43
+ "name": "fim.models.blocks.neural_operators.AttentionOperator",
44
+ "num_res_layers": 1,
45
+ "paths_block_attention": false
46
+ },
47
+ "context_ts_encoder": {
48
+ "encoder_layer": {
49
+ "batch_first": true,
50
+ "dropout": 0.0,
51
+ "name": "torch.nn.TransformerEncoderLayer",
52
+ "nhead": 4
53
+ },
54
+ "name": "torch.nn.TransformerEncoder",
55
+ "num_layers": 4
56
+ },
57
+ "decoder_ts": {
58
+ "decoder_layer": {
59
+ "batch_first": true,
60
+ "dropout": 0.0,
61
+ "name": "torch.nn.TransformerDecoderLayer",
62
+ "nhead": 4
63
+ },
64
+ "name": "torch.nn.TransformerDecoder",
65
+ "num_layers": 4
66
+ },
67
+ "delta_time_encoder": {
68
+ "name": "fim.models.blocks.positional_encodings.SineTimeEncoding",
69
+ "out_features": 256
70
+ },
71
+ "evaluation_mark_encoder": {
72
+ "name": "torch.nn.Linear"
73
+ },
74
+ "hidden_act": {
75
+ "name": "torch.nn.GELU"
76
+ },
77
+ "hidden_dim": 256,
78
+ "loss_weights": {
79
+ "alpha": 0.0,
80
+ "mu": 0.0,
81
+ "nll": 1.0,
82
+ "relative_spike": 0.0,
83
+ "smape": 0.0
84
+ },
85
+ "mark_encoder": {
86
+ "name": "torch.nn.Linear",
87
+ "out_features": 256
88
+ },
89
+ "mark_fusion_attention": null,
90
+ "max_num_marks": 22,
91
+ "mu_decoder": {
92
+ "hidden_act": {
93
+ "name": "torch.nn.GELU"
94
+ },
95
+ "hidden_layers": [
96
+ 256,
97
+ 256
98
+ ],
99
+ "name": "fim.models.blocks.base.MLP"
100
+ },
101
+ "nll": {
102
+ "method": "monte_carlo",
103
+ "num_integration_points": 200
104
+ },
105
+ "normalize_by_max_time": false,
106
+ "normalize_times": true,
107
+ "thinning": null,
108
+ "time_encoder": {
109
+ "name": "fim.models.blocks.positional_encodings.SineTimeEncoding",
110
+ "out_features": 256
111
+ },
112
+ "torch_dtype": "float32",
113
+ "transformers_version": "4.46.0",
114
+ "model_type": "fimhawkes"
115
+ }
configuration_hawkes.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from fim.models.hawkes import FIMHawkesConfig
2
+
3
+
4
+ __all__ = ["FIMHawkesConfig"]
model-checkpoint.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca6607f15650cc0eb430d6a663ff539cd785640cf4db412ae09bcf63991a3ebd
3
- size 64581256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb9f416bc1059d11675a67d9693f7a094ebb4a8c0b6d1c3dfcc2a662280f2dca
3
+ size 64569191
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f6099a698340f62e0e61bccd1c3e8b4c37a36134927676b99259064d8686b0f
3
+ size 64527620
model_architecture.txt ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ===============================================================================================
2
+ Layer (type:depth-idx) Output Shape Param #
3
+ ===============================================================================================
4
+ FIMHawkes -- 256
5
+ ├─SineTimeEncoding: 1-1 [6, 1, 100, 256] --
6
+ │ └─Linear: 2-1 [6, 1, 100, 1] 2
7
+ │ └─Sequential: 2-2 [6, 1, 100, 255] --
8
+ │ │ └─Linear: 3-1 [6, 1, 100, 255] 510
9
+ │ │ └─SinActivation: 3-2 [6, 1, 100, 255] --
10
+ ├─SineTimeEncoding: 1-2 [6, 1, 100, 256] --
11
+ │ └─Linear: 2-3 [6, 1, 100, 1] 2
12
+ │ └─Sequential: 2-4 [6, 1, 100, 255] --
13
+ │ │ └─Linear: 3-3 [6, 1, 100, 255] 510
14
+ │ │ └─SinActivation: 3-4 [6, 1, 100, 255] --
15
+ ├─Linear: 1-3 [600, 256] 5,888
16
+ ├─LayerNorm: 1-4 [6, 1, 100, 256] 512
17
+ ├─SineTimeEncoding: 1-5 [6, 1999, 100, 256] (recursive)
18
+ │ └─Linear: 2-5 [6, 1999, 100, 1] (recursive)
19
+ │ └─Sequential: 2-6 [6, 1999, 100, 255] (recursive)
20
+ │ │ └─Linear: 3-5 [6, 1999, 100, 255] (recursive)
21
+ │ │ └─SinActivation: 3-6 [6, 1999, 100, 255] --
22
+ ├─SineTimeEncoding: 1-6 [6, 1999, 100, 256] (recursive)
23
+ │ └─Linear: 2-7 [6, 1999, 100, 1] (recursive)
24
+ │ └─Sequential: 2-8 [6, 1999, 100, 255] (recursive)
25
+ │ │ └─Linear: 3-7 [6, 1999, 100, 255] (recursive)
26
+ │ │ └─SinActivation: 3-8 [6, 1999, 100, 255] --
27
+ ├─Linear: 1-7 [1199400, 256] (recursive)
28
+ ├─LayerNorm: 1-8 [6, 1999, 100, 256] (recursive)
29
+ ├─TransformerEncoder: 1-9 [11994, 100, 256] --
30
+ │ └─ModuleList: 2-9 -- --
31
+ │ │ └─TransformerEncoderLayer: 3-9 [11994, 100, 256] 1,315,072
32
+ │ │ └─TransformerEncoderLayer: 3-10 [11994, 100, 256] 1,315,072
33
+ │ │ └─TransformerEncoderLayer: 3-11 [11994, 100, 256] 1,315,072
34
+ │ │ └─TransformerEncoderLayer: 3-12 [11994, 100, 256] 1,315,072
35
+ ├─AttentionOperator: 1-10 [11994, 1, 256] --
36
+ │ └─ModuleList: 2-10 -- --
37
+ │ │ └─ResidualAttentionLayer: 3-13 [11994, 1, 256] 1,315,072
38
+ ├─TransformerEncoder: 1-11 [6, 1999, 256] --
39
+ │ └─ModuleList: 2-11 -- --
40
+ │ │ └─TransformerEncoderLayer: 3-14 [6, 1999, 256] 1,315,072
41
+ │ │ └─TransformerEncoderLayer: 3-15 [6, 1999, 256] 1,315,072
42
+ ├─TransformerDecoder: 1-12 [6, 100, 256] --
43
+ │ └─ModuleList: 2-12 -- --
44
+ │ │ └─TransformerDecoderLayer: 3-16 [6, 100, 256] 1,578,752
45
+ │ │ └─TransformerDecoderLayer: 3-17 [6, 100, 256] 1,578,752
46
+ │ │ └─TransformerDecoderLayer: 3-18 [6, 100, 256] 1,578,752
47
+ │ │ └─TransformerDecoderLayer: 3-19 [6, 100, 256] 1,578,752
48
+ ├─Linear: 1-13 [1, 256] 5,888
49
+ ├─MLP: 1-14 [600, 1] --
50
+ │ └─Sequential: 2-13 [600, 1] --
51
+ │ │ └─Linear: 3-20 [600, 256] 131,328
52
+ │ │ └─GELU: 3-21 [600, 256] --
53
+ │ │ └─Dropout: 3-22 [600, 256] --
54
+ │ │ └─Linear: 3-23 [600, 256] 65,792
55
+ │ │ └─GELU: 3-24 [600, 256] --
56
+ │ │ └─Dropout: 3-25 [600, 256] --
57
+ │ │ └─Linear: 3-26 [600, 1] 257
58
+ ├─MLP: 1-15 [600, 1] --
59
+ │ └─Sequential: 2-14 [600, 1] --
60
+ │ │ └─Linear: 3-27 [600, 256] 131,328
61
+ │ │ └─GELU: 3-28 [600, 256] --
62
+ │ │ └─Dropout: 3-29 [600, 256] --
63
+ │ │ └─Linear: 3-30 [600, 256] 65,792
64
+ │ │ └─GELU: 3-31 [600, 256] --
65
+ │ │ └─Dropout: 3-32 [600, 256] --
66
+ │ │ └─Linear: 3-33 [600, 1] 257
67
+ ├─MLP: 1-16 [600, 1] --
68
+ │ └─Sequential: 2-15 [600, 1] --
69
+ │ │ └─Linear: 3-34 [600, 256] 131,328
70
+ │ │ └─GELU: 3-35 [600, 256] --
71
+ │ │ └─Dropout: 3-36 [600, 256] --
72
+ │ │ └─Linear: 3-37 [600, 256] 65,792
73
+ │ │ └─GELU: 3-38 [600, 256] --
74
+ │ │ └─Dropout: 3-39 [600, 256] --
75
+ │ │ └─Linear: 3-40 [600, 1] 257
76
+ ===============================================================================================
77
+ Total params: 16,126,211
78
+ Trainable params: 16,126,211
79
+ Non-trainable params: 0
80
+ Total mult-adds (Units.GIGABYTES): 70.54
81
+ ===============================================================================================
82
+ Input size (MB): 28.96
83
+ Forward/backward pass size (MB): 118787.71
84
+ Params size (MB): 48.71
85
+ Estimated Total Size (MB): 118865.38
86
+ ===============================================================================================
modeling_hawkes.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from fim.models.hawkes import FIMHawkes
2
+
3
+
4
+ __all__ = ["FIMHawkes"]
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c01bd55e9770f9df86a038c27b4997314f68a1b9abc565f8b983e171bbaf7fb6
3
+ size 64568539
train_parameters.yaml ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset:
2
+ dataset_kwargs:
3
+ field_name_for_dimension_grouping: base_intensity_functions
4
+ files_to_load:
5
+ base_intensity_functions: base_intensity_functions.pt
6
+ event_times: event_times.pt
7
+ event_types: event_types.pt
8
+ kernel_functions: kernel_functions.pt
9
+ time_offsets: time_offsets.pt
10
+ shuffle: true
11
+ loader_kwargs:
12
+ batch_size: 6
13
+ full_len_ratio: 0.1
14
+ max_number_of_minibatch_sizes: 8
15
+ max_path_count: 2000
16
+ max_sequence_len: 100
17
+ min_path_count: 400
18
+ min_sequence_len: 15
19
+ num_inference_paths: 1
20
+ num_inference_times: 2000
21
+ num_workers: 16
22
+ test_batch_size: 2
23
+ variable_num_of_paths: true
24
+ variable_sequence_lens:
25
+ train: true
26
+ validation: false
27
+ name: HawkesDataLoader
28
+ path:
29
+ train: !!python/tuple
30
+ - data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_exp_kernel/train
31
+ - data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_exp_kernel_no_interactions/train
32
+ - data/synthetic_data/hawkes/1k_1D_2k_paths_Gamma_base_exp_kernel/train
33
+ - data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_exp_kernel/train
34
+ - data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_exp_kernel_no_interactions/train
35
+ - data/synthetic_data/hawkes/1k_5D_2k_paths_Gamma_base_exp_kernel/train
36
+ - data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel/train
37
+ - data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel_no_interactions/train
38
+ - data/synthetic_data/hawkes/1k_10D_2k_paths_poisson/train
39
+ - data/synthetic_data/hawkes/1k_10D_2k_paths_Gamma_base_exp_kernel/train
40
+ - data/synthetic_data/hawkes/1k_10D_2k_paths_sin_base_exp_kernel/train
41
+ - data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel/train
42
+ - data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel_no_interactions/train
43
+ - data/synthetic_data/hawkes/1k_15D_2k_paths_poisson/train
44
+ - data/synthetic_data/hawkes/1k_15D_2k_paths_Gamma_base_exp_kernel/train
45
+ - data/synthetic_data/hawkes/1k_15D_2k_paths_sin_base_exp_kernel/train
46
+ - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel/train
47
+ - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_no_interactions/train
48
+ - data/synthetic_data/hawkes/5k_22D_2k_paths_poisson/train
49
+ - data/synthetic_data/hawkes/5k_22D_2k_paths_Gamma_base_exp_kernel/train
50
+ - data/synthetic_data/hawkes/5k_22D_2k_paths_sin_base_exp_kernel/train
51
+ - data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel_sparse/train
52
+ - data/synthetic_data/hawkes/1k_10D_2k_paths_Gamma_base_exp_kernel_sparse/train
53
+ - data/synthetic_data/hawkes/1k_10D_2k_paths_sin_base_exp_kernel_sparse/train
54
+ - data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel_sparse/train
55
+ - data/synthetic_data/hawkes/1k_15D_2k_paths_Gamma_base_exp_kernel_sparse/train
56
+ - data/synthetic_data/hawkes/1k_15D_2k_paths_sin_base_exp_kernel_sparse/train
57
+ - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_sparse/train
58
+ - data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_rayleigh_kernel/train
59
+ - data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_rayleigh_kernel/train
60
+ - data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_rayleigh_kernel/train
61
+ - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel/train
62
+ - data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_rayleigh_kernel_sparse/train
63
+ - data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_rayleigh_kernel_sparse/train
64
+ - data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_rayleigh_kernel_sparse/train
65
+ - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel_sparse/train
66
+ validation: !!python/tuple
67
+ - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel/val
68
+ - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_no_interactions/val
69
+ - data/synthetic_data/hawkes/5k_22D_2k_paths_poisson/val
70
+ - data/synthetic_data/hawkes/5k_22D_2k_paths_Gamma_base_exp_kernel/val
71
+ - data/synthetic_data/hawkes/5k_22D_2k_paths_sin_base_exp_kernel/val
72
+ - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel/val
73
+ - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel_sparse/val
74
+ distributed:
75
+ activation_chekpoint: false
76
+ checkpoint_type: full_state
77
+ enabled: false
78
+ min_num_params: 1e5
79
+ sharding_strategy: NO_SHARD
80
+ wrap_policy: SIZE_BAZED
81
+ experiment:
82
+ device_map: auto
83
+ name: FIM_Hawkes_10-22st_nll_mc_only_2000_paths_mixed_100_events_mixed-experiment-seed-10-dataset-dataset_kwargs-field_name_for_dimension_grouping-base_intensity_functions
84
+ name_add_date: true
85
+ seed: 10
86
+ model:
87
+ alpha_decoder:
88
+ hidden_act:
89
+ name: torch.nn.GELU
90
+ hidden_layers: !!python/tuple
91
+ - 256
92
+ - 256
93
+ name: fim.models.blocks.base.MLP
94
+ beta_decoder:
95
+ hidden_act:
96
+ name: torch.nn.GELU
97
+ hidden_layers: !!python/tuple
98
+ - 256
99
+ - 256
100
+ name: fim.models.blocks.base.MLP
101
+ context_summary_encoder:
102
+ encoder_layer:
103
+ batch_first: true
104
+ dropout: 0.0
105
+ name: torch.nn.TransformerEncoderLayer
106
+ nhead: 4
107
+ name: torch.nn.TransformerEncoder
108
+ num_layers: 2
109
+ context_summary_pooling:
110
+ attention:
111
+ nhead: 4
112
+ name: fim.models.blocks.neural_operators.AttentionOperator
113
+ num_res_layers: 1
114
+ paths_block_attention: false
115
+ context_ts_encoder:
116
+ encoder_layer:
117
+ batch_first: true
118
+ dropout: 0.0
119
+ name: torch.nn.TransformerEncoderLayer
120
+ nhead: 4
121
+ name: torch.nn.TransformerEncoder
122
+ num_layers: 4
123
+ decoder_ts:
124
+ decoder_layer:
125
+ batch_first: true
126
+ dropout: 0.0
127
+ name: torch.nn.TransformerDecoderLayer
128
+ nhead: 4
129
+ name: torch.nn.TransformerDecoder
130
+ num_layers: 4
131
+ delta_time_encoder:
132
+ name: fim.models.blocks.positional_encodings.SineTimeEncoding
133
+ out_features: 256
134
+ evaluation_mark_encoder:
135
+ name: torch.nn.Linear
136
+ hidden_act:
137
+ name: torch.nn.GELU
138
+ hidden_dim: 256
139
+ loss_weights:
140
+ alpha: 0.0
141
+ mu: 0.0
142
+ nll: 1.0
143
+ relative_spike: 0.0
144
+ smape: 0.0
145
+ mark_encoder:
146
+ name: torch.nn.Linear
147
+ out_features: 256
148
+ mark_fusion_attention: null
149
+ max_num_marks: 22
150
+ model_type: fimhawkes
151
+ mu_decoder:
152
+ hidden_act:
153
+ name: torch.nn.GELU
154
+ hidden_layers: !!python/tuple
155
+ - 256
156
+ - 256
157
+ name: fim.models.blocks.base.MLP
158
+ nll:
159
+ method: monte_carlo
160
+ num_integration_points: 200
161
+ normalize_by_max_time: false
162
+ normalize_times: true
163
+ thinning: null
164
+ time_encoder:
165
+ name: fim.models.blocks.positional_encodings.SineTimeEncoding
166
+ out_features: 256
167
+ optimizers: !!python/tuple
168
+ - optimizer_d:
169
+ lr: 5.0e-05
170
+ name: torch.optim.AdamW
171
+ weight_decay: 0.0001
172
+ trainer:
173
+ best_metric: loss
174
+ debug_iterations: null
175
+ detect_anomaly: false
176
+ epochs: 100000
177
+ evaluation_epoch:
178
+ enable_plotting: false
179
+ inference_path_idx: 0
180
+ iterator_name: validation
181
+ path: fim.trainers.evaluation_epochs.HawkesEvaluationPlots
182
+ plot_frequency: 10
183
+ experiment_dir: ./results/
184
+ gradient_accumulation_steps: 6
185
+ logging_format: RANK_%(rank)s - %(asctime)s - %(name)s - %(levelname)s - %(message)s
186
+ name: Trainer
187
+ precision: bf16_mixed
188
+ save_every: 1
189
+ schedulers: !!python/tuple
190
+ - beta: 1.0
191
+ label: gauss_nll
192
+ name: fim.utils.param_scheduler.ConstantScheduler
193
+ - beta: 1.0
194
+ label: init_cross_entropy
195
+ name: fim.utils.param_scheduler.ConstantScheduler
196
+ - beta: 1.0
197
+ label: missing_link
198
+ name: fim.utils.param_scheduler.ConstantScheduler