Upload folder using huggingface_hub

Browse files

Files changed (10) hide show

README.md +48 -0
__init__.py +5 -0
config.json +115 -0
configuration_hawkes.py +4 -0
model-checkpoint.pth +2 -2
model.safetensors +3 -0
model_architecture.txt +86 -0
modeling_hawkes.py +4 -0
pytorch_model.bin +3 -0
train_parameters.yaml +198 -0

README.md ADDED Viewed

	@@ -0,0 +1,48 @@

+---
+tags:
+- time-series
+- temporal-point-processes
+- hawkes-processes
+- scientific-ml
+license: mit
+library_name: transformers
+---
+# FIM-PP Model Card
+`FIM-PP` is the Foundation Inference Model for marked temporal point processes.
+It infers conditional intensity functions from a context set of event sequences and
+supports zero-shot use as well as downstream fine-tuning.
+## Loading
+Install the `fim` package first, then load the model with Transformers:
+```python
+from transformers import AutoModel
+model = AutoModel.from_pretrained("FIM4Science/FIM-PP", trust_remote_code=True)
+model.eval()
+```
+## Notes
+- The released checkpoint is configured for up to 22 event marks.
+- The model expects Hawkes-style context and inference tensors as described in the
+  OpenFIM point-process tutorial.
+- If needed, the lower-level fallback remains available through
+  `fim.models.hawkes.FIMHawkes.load_model(...)`.
+## Reference
+If you use this model, please cite:
+```bibtex
+@inproceedings{fim_pp,
+  title={In-Context Learning of Temporal Point Processes with Foundation Inference Models},
+  author={David Berghaus and Patrick Seifner and Kostadin Cvejoski and Cesar Ojeda and Ramses J. Sanchez},
+  booktitle={The Fourteenth International Conference on Learning Representations},
+  year={2026},
+  url={https://openreview.net/forum?id=h9HwUAODFP}
+}
+```

__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from .configuration_hawkes import FIMHawkesConfig
+from .modeling_hawkes import FIMHawkes
+__all__ = ["FIMHawkes", "FIMHawkesConfig"]

config.json ADDED Viewed

	@@ -0,0 +1,115 @@

+{
+  "alpha_decoder": {
+    "hidden_act": {
+      "name": "torch.nn.GELU"
+    },
+    "hidden_layers": [
+      256,
+      256
+    ],
+    "name": "fim.models.blocks.base.MLP"
+  },
+  "architectures": [
+    "FIMHawkes"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_hawkes.FIMHawkesConfig",
+    "AutoModel": "modeling_hawkes.FIMHawkes"
+  },
+  "beta_decoder": {
+    "hidden_act": {
+      "name": "torch.nn.GELU"
+    },
+    "hidden_layers": [
+      256,
+      256
+    ],
+    "name": "fim.models.blocks.base.MLP"
+  },
+  "context_summary_encoder": {
+    "encoder_layer": {
+      "batch_first": true,
+      "dropout": 0.0,
+      "name": "torch.nn.TransformerEncoderLayer",
+      "nhead": 4
+    },
+    "name": "torch.nn.TransformerEncoder",
+    "num_layers": 2
+  },
+  "context_summary_pooling": {
+    "attention": {
+      "nhead": 4
+    },
+    "name": "fim.models.blocks.neural_operators.AttentionOperator",
+    "num_res_layers": 1,
+    "paths_block_attention": false
+  },
+  "context_ts_encoder": {
+    "encoder_layer": {
+      "batch_first": true,
+      "dropout": 0.0,
+      "name": "torch.nn.TransformerEncoderLayer",
+      "nhead": 4
+    },
+    "name": "torch.nn.TransformerEncoder",
+    "num_layers": 4
+  },
+  "decoder_ts": {
+    "decoder_layer": {
+      "batch_first": true,
+      "dropout": 0.0,
+      "name": "torch.nn.TransformerDecoderLayer",
+      "nhead": 4
+    },
+    "name": "torch.nn.TransformerDecoder",
+    "num_layers": 4
+  },
+  "delta_time_encoder": {
+    "name": "fim.models.blocks.positional_encodings.SineTimeEncoding",
+    "out_features": 256
+  },
+  "evaluation_mark_encoder": {
+    "name": "torch.nn.Linear"
+  },
+  "hidden_act": {
+    "name": "torch.nn.GELU"
+  },
+  "hidden_dim": 256,
+  "loss_weights": {
+    "alpha": 0.0,
+    "mu": 0.0,
+    "nll": 1.0,
+    "relative_spike": 0.0,
+    "smape": 0.0
+  },
+  "mark_encoder": {
+    "name": "torch.nn.Linear",
+    "out_features": 256
+  },
+  "mark_fusion_attention": null,
+  "max_num_marks": 22,
+  "mu_decoder": {
+    "hidden_act": {
+      "name": "torch.nn.GELU"
+    },
+    "hidden_layers": [
+      256,
+      256
+    ],
+    "name": "fim.models.blocks.base.MLP"
+  },
+  "nll": {
+    "method": "monte_carlo",
+    "num_integration_points": 200
+  },
+  "normalize_by_max_time": false,
+  "normalize_times": true,
+  "thinning": null,
+  "time_encoder": {
+    "name": "fim.models.blocks.positional_encodings.SineTimeEncoding",
+    "out_features": 256
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.46.0",
+  "model_type": "fimhawkes"
+}

configuration_hawkes.py ADDED Viewed

	@@ -0,0 +1,4 @@


1	+ from fim.models.hawkes import FIMHawkesConfig
2	+
3	+
4	+ __all__ = ["FIMHawkesConfig"]

model-checkpoint.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ca6607f15650cc0eb430d6a663ff539cd785640cf4db412ae09bcf63991a3ebd
-size 64581256

 version https://git-lfs.github.com/spec/v1
+oid sha256:cb9f416bc1059d11675a67d9693f7a094ebb4a8c0b6d1c3dfcc2a662280f2dca
+size 64569191

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2f6099a698340f62e0e61bccd1c3e8b4c37a36134927676b99259064d8686b0f
+size 64527620

model_architecture.txt ADDED Viewed

	@@ -0,0 +1,86 @@

+===============================================================================================
+Layer (type:depth-idx)                        Output Shape              Param #
+===============================================================================================
+FIMHawkes                                     --                        256
+├─SineTimeEncoding: 1-1                       [6, 1, 100, 256]          --
+│    └─Linear: 2-1                            [6, 1, 100, 1]            2
+│    └─Sequential: 2-2                        [6, 1, 100, 255]          --
+│    │    └─Linear: 3-1                       [6, 1, 100, 255]          510
+│    │    └─SinActivation: 3-2                [6, 1, 100, 255]          --
+├─SineTimeEncoding: 1-2                       [6, 1, 100, 256]          --
+│    └─Linear: 2-3                            [6, 1, 100, 1]            2
+│    └─Sequential: 2-4                        [6, 1, 100, 255]          --
+│    │    └─Linear: 3-3                       [6, 1, 100, 255]          510
+│    │    └─SinActivation: 3-4                [6, 1, 100, 255]          --
+├─Linear: 1-3                                 [600, 256]                5,888
+├─LayerNorm: 1-4                              [6, 1, 100, 256]          512
+├─SineTimeEncoding: 1-5                       [6, 1999, 100, 256]       (recursive)
+│    └─Linear: 2-5                            [6, 1999, 100, 1]         (recursive)
+│    └─Sequential: 2-6                        [6, 1999, 100, 255]       (recursive)
+│    │    └─Linear: 3-5                       [6, 1999, 100, 255]       (recursive)
+│    │    └─SinActivation: 3-6                [6, 1999, 100, 255]       --
+├─SineTimeEncoding: 1-6                       [6, 1999, 100, 256]       (recursive)
+│    └─Linear: 2-7                            [6, 1999, 100, 1]         (recursive)
+│    └─Sequential: 2-8                        [6, 1999, 100, 255]       (recursive)
+│    │    └─Linear: 3-7                       [6, 1999, 100, 255]       (recursive)
+│    │    └─SinActivation: 3-8                [6, 1999, 100, 255]       --
+├─Linear: 1-7                                 [1199400, 256]            (recursive)
+├─LayerNorm: 1-8                              [6, 1999, 100, 256]       (recursive)
+├─TransformerEncoder: 1-9                     [11994, 100, 256]         --
+│    └─ModuleList: 2-9                        --                        --
+│    │    └─TransformerEncoderLayer: 3-9      [11994, 100, 256]         1,315,072
+│    │    └─TransformerEncoderLayer: 3-10     [11994, 100, 256]         1,315,072
+│    │    └─TransformerEncoderLayer: 3-11     [11994, 100, 256]         1,315,072
+│    │    └─TransformerEncoderLayer: 3-12     [11994, 100, 256]         1,315,072
+├─AttentionOperator: 1-10                     [11994, 1, 256]           --
+│    └─ModuleList: 2-10                       --                        --
+│    │    └─ResidualAttentionLayer: 3-13      [11994, 1, 256]           1,315,072
+├─TransformerEncoder: 1-11                    [6, 1999, 256]            --
+│    └─ModuleList: 2-11                       --                        --
+│    │    └─TransformerEncoderLayer: 3-14     [6, 1999, 256]            1,315,072
+│    │    └─TransformerEncoderLayer: 3-15     [6, 1999, 256]            1,315,072
+├─TransformerDecoder: 1-12                    [6, 100, 256]             --
+│    └─ModuleList: 2-12                       --                        --
+│    │    └─TransformerDecoderLayer: 3-16     [6, 100, 256]             1,578,752
+│    │    └─TransformerDecoderLayer: 3-17     [6, 100, 256]             1,578,752
+│    │    └─TransformerDecoderLayer: 3-18     [6, 100, 256]             1,578,752
+│    │    └─TransformerDecoderLayer: 3-19     [6, 100, 256]             1,578,752
+├─Linear: 1-13                                [1, 256]                  5,888
+├─MLP: 1-14                                   [600, 1]                  --
+│    └─Sequential: 2-13                       [600, 1]                  --
+│    │    └─Linear: 3-20                      [600, 256]                131,328
+│    │    └─GELU: 3-21                        [600, 256]                --
+│    │    └─Dropout: 3-22                     [600, 256]                --
+│    │    └─Linear: 3-23                      [600, 256]                65,792
+│    │    └─GELU: 3-24                        [600, 256]                --
+│    │    └─Dropout: 3-25                     [600, 256]                --
+│    │    └─Linear: 3-26                      [600, 1]                  257
+├─MLP: 1-15                                   [600, 1]                  --
+│    └─Sequential: 2-14                       [600, 1]                  --
+│    │    └─Linear: 3-27                      [600, 256]                131,328
+│    │    └─GELU: 3-28                        [600, 256]                --
+│    │    └─Dropout: 3-29                     [600, 256]                --
+│    │    └─Linear: 3-30                      [600, 256]                65,792
+│    │    └─GELU: 3-31                        [600, 256]                --
+│    │    └─Dropout: 3-32                     [600, 256]                --
+│    │    └─Linear: 3-33                      [600, 1]                  257
+├─MLP: 1-16                                   [600, 1]                  --
+│    └─Sequential: 2-15                       [600, 1]                  --
+│    │    └─Linear: 3-34                      [600, 256]                131,328
+│    │    └─GELU: 3-35                        [600, 256]                --
+│    │    └─Dropout: 3-36                     [600, 256]                --
+│    │    └─Linear: 3-37                      [600, 256]                65,792
+│    │    └─GELU: 3-38                        [600, 256]                --
+│    │    └─Dropout: 3-39                     [600, 256]                --
+│    │    └─Linear: 3-40                      [600, 1]                  257
+===============================================================================================
+Total params: 16,126,211
+Trainable params: 16,126,211
+Non-trainable params: 0
+Total mult-adds (Units.GIGABYTES): 70.54
+===============================================================================================
+Input size (MB): 28.96
+Forward/backward pass size (MB): 118787.71
+Params size (MB): 48.71
+Estimated Total Size (MB): 118865.38
+===============================================================================================

modeling_hawkes.py ADDED Viewed

	@@ -0,0 +1,4 @@


1	+ from fim.models.hawkes import FIMHawkes
2	+
3	+
4	+ __all__ = ["FIMHawkes"]

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c01bd55e9770f9df86a038c27b4997314f68a1b9abc565f8b983e171bbaf7fb6
+size 64568539

train_parameters.yaml ADDED Viewed

	@@ -0,0 +1,198 @@

+dataset:
+  dataset_kwargs:
+    field_name_for_dimension_grouping: base_intensity_functions
+    files_to_load:
+      base_intensity_functions: base_intensity_functions.pt
+      event_times: event_times.pt
+      event_types: event_types.pt
+      kernel_functions: kernel_functions.pt
+      time_offsets: time_offsets.pt
+    shuffle: true
+  loader_kwargs:
+    batch_size: 6
+    full_len_ratio: 0.1
+    max_number_of_minibatch_sizes: 8
+    max_path_count: 2000
+    max_sequence_len: 100
+    min_path_count: 400
+    min_sequence_len: 15
+    num_inference_paths: 1
+    num_inference_times: 2000
+    num_workers: 16
+    test_batch_size: 2
+    variable_num_of_paths: true
+    variable_sequence_lens:
+      train: true
+      validation: false
+  name: HawkesDataLoader
+  path:
+    train: !!python/tuple
+    - data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_exp_kernel/train
+    - data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_exp_kernel_no_interactions/train
+    - data/synthetic_data/hawkes/1k_1D_2k_paths_Gamma_base_exp_kernel/train
+    - data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_exp_kernel/train
+    - data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_exp_kernel_no_interactions/train
+    - data/synthetic_data/hawkes/1k_5D_2k_paths_Gamma_base_exp_kernel/train
+    - data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel/train
+    - data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel_no_interactions/train
+    - data/synthetic_data/hawkes/1k_10D_2k_paths_poisson/train
+    - data/synthetic_data/hawkes/1k_10D_2k_paths_Gamma_base_exp_kernel/train
+    - data/synthetic_data/hawkes/1k_10D_2k_paths_sin_base_exp_kernel/train
+    - data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel/train
+    - data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel_no_interactions/train
+    - data/synthetic_data/hawkes/1k_15D_2k_paths_poisson/train
+    - data/synthetic_data/hawkes/1k_15D_2k_paths_Gamma_base_exp_kernel/train
+    - data/synthetic_data/hawkes/1k_15D_2k_paths_sin_base_exp_kernel/train
+    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel/train
+    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_no_interactions/train
+    - data/synthetic_data/hawkes/5k_22D_2k_paths_poisson/train
+    - data/synthetic_data/hawkes/5k_22D_2k_paths_Gamma_base_exp_kernel/train
+    - data/synthetic_data/hawkes/5k_22D_2k_paths_sin_base_exp_kernel/train
+    - data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel_sparse/train
+    - data/synthetic_data/hawkes/1k_10D_2k_paths_Gamma_base_exp_kernel_sparse/train
+    - data/synthetic_data/hawkes/1k_10D_2k_paths_sin_base_exp_kernel_sparse/train
+    - data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel_sparse/train
+    - data/synthetic_data/hawkes/1k_15D_2k_paths_Gamma_base_exp_kernel_sparse/train
+    - data/synthetic_data/hawkes/1k_15D_2k_paths_sin_base_exp_kernel_sparse/train
+    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_sparse/train
+    - data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_rayleigh_kernel/train
+    - data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_rayleigh_kernel/train
+    - data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_rayleigh_kernel/train
+    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel/train
+    - data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_rayleigh_kernel_sparse/train
+    - data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_rayleigh_kernel_sparse/train
+    - data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_rayleigh_kernel_sparse/train
+    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel_sparse/train
+    validation: !!python/tuple
+    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel/val
+    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_no_interactions/val
+    - data/synthetic_data/hawkes/5k_22D_2k_paths_poisson/val
+    - data/synthetic_data/hawkes/5k_22D_2k_paths_Gamma_base_exp_kernel/val
+    - data/synthetic_data/hawkes/5k_22D_2k_paths_sin_base_exp_kernel/val
+    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel/val
+    - data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel_sparse/val
+distributed:
+  activation_chekpoint: false
+  checkpoint_type: full_state
+  enabled: false
+  min_num_params: 1e5
+  sharding_strategy: NO_SHARD
+  wrap_policy: SIZE_BAZED
+experiment:
+  device_map: auto
+  name: FIM_Hawkes_10-22st_nll_mc_only_2000_paths_mixed_100_events_mixed-experiment-seed-10-dataset-dataset_kwargs-field_name_for_dimension_grouping-base_intensity_functions
+  name_add_date: true
+  seed: 10
+model:
+  alpha_decoder:
+    hidden_act:
+      name: torch.nn.GELU
+    hidden_layers: !!python/tuple
+    - 256
+    - 256
+    name: fim.models.blocks.base.MLP
+  beta_decoder:
+    hidden_act:
+      name: torch.nn.GELU
+    hidden_layers: !!python/tuple
+    - 256
+    - 256
+    name: fim.models.blocks.base.MLP
+  context_summary_encoder:
+    encoder_layer:
+      batch_first: true
+      dropout: 0.0
+      name: torch.nn.TransformerEncoderLayer
+      nhead: 4
+    name: torch.nn.TransformerEncoder
+    num_layers: 2
+  context_summary_pooling:
+    attention:
+      nhead: 4
+    name: fim.models.blocks.neural_operators.AttentionOperator
+    num_res_layers: 1
+    paths_block_attention: false
+  context_ts_encoder:
+    encoder_layer:
+      batch_first: true
+      dropout: 0.0
+      name: torch.nn.TransformerEncoderLayer
+      nhead: 4
+    name: torch.nn.TransformerEncoder
+    num_layers: 4
+  decoder_ts:
+    decoder_layer:
+      batch_first: true
+      dropout: 0.0
+      name: torch.nn.TransformerDecoderLayer
+      nhead: 4
+    name: torch.nn.TransformerDecoder
+    num_layers: 4
+  delta_time_encoder:
+    name: fim.models.blocks.positional_encodings.SineTimeEncoding
+    out_features: 256
+  evaluation_mark_encoder:
+    name: torch.nn.Linear
+  hidden_act:
+    name: torch.nn.GELU
+  hidden_dim: 256
+  loss_weights:
+    alpha: 0.0
+    mu: 0.0
+    nll: 1.0
+    relative_spike: 0.0
+    smape: 0.0
+  mark_encoder:
+    name: torch.nn.Linear
+    out_features: 256
+  mark_fusion_attention: null
+  max_num_marks: 22
+  model_type: fimhawkes
+  mu_decoder:
+    hidden_act:
+      name: torch.nn.GELU
+    hidden_layers: !!python/tuple
+    - 256
+    - 256
+    name: fim.models.blocks.base.MLP
+  nll:
+    method: monte_carlo
+    num_integration_points: 200
+  normalize_by_max_time: false
+  normalize_times: true
+  thinning: null
+  time_encoder:
+    name: fim.models.blocks.positional_encodings.SineTimeEncoding
+    out_features: 256
+optimizers: !!python/tuple
+- optimizer_d:
+    lr: 5.0e-05
+    name: torch.optim.AdamW
+    weight_decay: 0.0001
+trainer:
+  best_metric: loss
+  debug_iterations: null
+  detect_anomaly: false
+  epochs: 100000
+  evaluation_epoch:
+    enable_plotting: false
+    inference_path_idx: 0
+    iterator_name: validation
+    path: fim.trainers.evaluation_epochs.HawkesEvaluationPlots
+    plot_frequency: 10
+  experiment_dir: ./results/
+  gradient_accumulation_steps: 6
+  logging_format: RANK_%(rank)s - %(asctime)s - %(name)s - %(levelname)s - %(message)s
+  name: Trainer
+  precision: bf16_mixed
+  save_every: 1
+  schedulers: !!python/tuple
+  - beta: 1.0
+    label: gauss_nll
+    name: fim.utils.param_scheduler.ConstantScheduler
+  - beta: 1.0
+    label: init_cross_entropy
+    name: fim.utils.param_scheduler.ConstantScheduler
+  - beta: 1.0
+    label: missing_link
+    name: fim.utils.param_scheduler.ConstantScheduler