FIM-PP / train_parameters.yaml
david-berghaus-fh's picture
Upload folder using huggingface_hub
9b43be1
dataset:
dataset_kwargs:
field_name_for_dimension_grouping: base_intensity_functions
files_to_load:
base_intensity_functions: base_intensity_functions.pt
event_times: event_times.pt
event_types: event_types.pt
kernel_functions: kernel_functions.pt
time_offsets: time_offsets.pt
shuffle: true
loader_kwargs:
batch_size: 6
full_len_ratio: 0.1
max_number_of_minibatch_sizes: 8
max_path_count: 2000
max_sequence_len: 100
min_path_count: 400
min_sequence_len: 15
num_inference_paths: 1
num_inference_times: 2000
num_workers: 16
test_batch_size: 2
variable_num_of_paths: true
variable_sequence_lens:
train: true
validation: false
name: HawkesDataLoader
path:
train: !!python/tuple
- data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_exp_kernel/train
- data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_exp_kernel_no_interactions/train
- data/synthetic_data/hawkes/1k_1D_2k_paths_Gamma_base_exp_kernel/train
- data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_exp_kernel/train
- data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_exp_kernel_no_interactions/train
- data/synthetic_data/hawkes/1k_5D_2k_paths_Gamma_base_exp_kernel/train
- data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel/train
- data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel_no_interactions/train
- data/synthetic_data/hawkes/1k_10D_2k_paths_poisson/train
- data/synthetic_data/hawkes/1k_10D_2k_paths_Gamma_base_exp_kernel/train
- data/synthetic_data/hawkes/1k_10D_2k_paths_sin_base_exp_kernel/train
- data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel/train
- data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel_no_interactions/train
- data/synthetic_data/hawkes/1k_15D_2k_paths_poisson/train
- data/synthetic_data/hawkes/1k_15D_2k_paths_Gamma_base_exp_kernel/train
- data/synthetic_data/hawkes/1k_15D_2k_paths_sin_base_exp_kernel/train
- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel/train
- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_no_interactions/train
- data/synthetic_data/hawkes/5k_22D_2k_paths_poisson/train
- data/synthetic_data/hawkes/5k_22D_2k_paths_Gamma_base_exp_kernel/train
- data/synthetic_data/hawkes/5k_22D_2k_paths_sin_base_exp_kernel/train
- data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_exp_kernel_sparse/train
- data/synthetic_data/hawkes/1k_10D_2k_paths_Gamma_base_exp_kernel_sparse/train
- data/synthetic_data/hawkes/1k_10D_2k_paths_sin_base_exp_kernel_sparse/train
- data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_exp_kernel_sparse/train
- data/synthetic_data/hawkes/1k_15D_2k_paths_Gamma_base_exp_kernel_sparse/train
- data/synthetic_data/hawkes/1k_15D_2k_paths_sin_base_exp_kernel_sparse/train
- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_sparse/train
- data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_rayleigh_kernel/train
- data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_rayleigh_kernel/train
- data/synthetic_data/hawkes/1k_15D_2k_paths_const_base_rayleigh_kernel/train
- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel/train
- data/synthetic_data/hawkes/1k_1D_2k_paths_const_base_rayleigh_kernel_sparse/train
- data/synthetic_data/hawkes/1k_5D_2k_paths_const_base_rayleigh_kernel_sparse/train
- data/synthetic_data/hawkes/1k_10D_2k_paths_const_base_rayleigh_kernel_sparse/train
- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel_sparse/train
validation: !!python/tuple
- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel/val
- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_exp_kernel_no_interactions/val
- data/synthetic_data/hawkes/5k_22D_2k_paths_poisson/val
- data/synthetic_data/hawkes/5k_22D_2k_paths_Gamma_base_exp_kernel/val
- data/synthetic_data/hawkes/5k_22D_2k_paths_sin_base_exp_kernel/val
- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel/val
- data/synthetic_data/hawkes/5k_22D_2k_paths_const_base_rayleigh_kernel_sparse/val
distributed:
activation_chekpoint: false
checkpoint_type: full_state
enabled: false
min_num_params: 1e5
sharding_strategy: NO_SHARD
wrap_policy: SIZE_BAZED
experiment:
device_map: auto
name: FIM_Hawkes_10-22st_nll_mc_only_2000_paths_mixed_100_events_mixed-experiment-seed-10-dataset-dataset_kwargs-field_name_for_dimension_grouping-base_intensity_functions
name_add_date: true
seed: 10
model:
alpha_decoder:
hidden_act:
name: torch.nn.GELU
hidden_layers: !!python/tuple
- 256
- 256
name: fim.models.blocks.base.MLP
beta_decoder:
hidden_act:
name: torch.nn.GELU
hidden_layers: !!python/tuple
- 256
- 256
name: fim.models.blocks.base.MLP
context_summary_encoder:
encoder_layer:
batch_first: true
dropout: 0.0
name: torch.nn.TransformerEncoderLayer
nhead: 4
name: torch.nn.TransformerEncoder
num_layers: 2
context_summary_pooling:
attention:
nhead: 4
name: fim.models.blocks.neural_operators.AttentionOperator
num_res_layers: 1
paths_block_attention: false
context_ts_encoder:
encoder_layer:
batch_first: true
dropout: 0.0
name: torch.nn.TransformerEncoderLayer
nhead: 4
name: torch.nn.TransformerEncoder
num_layers: 4
decoder_ts:
decoder_layer:
batch_first: true
dropout: 0.0
name: torch.nn.TransformerDecoderLayer
nhead: 4
name: torch.nn.TransformerDecoder
num_layers: 4
delta_time_encoder:
name: fim.models.blocks.positional_encodings.SineTimeEncoding
out_features: 256
evaluation_mark_encoder:
name: torch.nn.Linear
hidden_act:
name: torch.nn.GELU
hidden_dim: 256
loss_weights:
alpha: 0.0
mu: 0.0
nll: 1.0
relative_spike: 0.0
smape: 0.0
mark_encoder:
name: torch.nn.Linear
out_features: 256
mark_fusion_attention: null
max_num_marks: 22
model_type: fimhawkes
mu_decoder:
hidden_act:
name: torch.nn.GELU
hidden_layers: !!python/tuple
- 256
- 256
name: fim.models.blocks.base.MLP
nll:
method: monte_carlo
num_integration_points: 200
normalize_by_max_time: false
normalize_times: true
thinning: null
time_encoder:
name: fim.models.blocks.positional_encodings.SineTimeEncoding
out_features: 256
optimizers: !!python/tuple
- optimizer_d:
lr: 5.0e-05
name: torch.optim.AdamW
weight_decay: 0.0001
trainer:
best_metric: loss
debug_iterations: null
detect_anomaly: false
epochs: 100000
evaluation_epoch:
enable_plotting: false
inference_path_idx: 0
iterator_name: validation
path: fim.trainers.evaluation_epochs.HawkesEvaluationPlots
plot_frequency: 10
experiment_dir: ./results/
gradient_accumulation_steps: 6
logging_format: RANK_%(rank)s - %(asctime)s - %(name)s - %(levelname)s - %(message)s
name: Trainer
precision: bf16_mixed
save_every: 1
schedulers: !!python/tuple
- beta: 1.0
label: gauss_nll
name: fim.utils.param_scheduler.ConstantScheduler
- beta: 1.0
label: init_cross_entropy
name: fim.utils.param_scheduler.ConstantScheduler
- beta: 1.0
label: missing_link
name: fim.utils.param_scheduler.ConstantScheduler