Upload FIM-ODE base model

Browse files

Files changed (7) hide show

base_model/checkpoints/best-model/best-model.pth +3 -0
base_model/checkpoints/best-model/config.json +30 -0
base_model/checkpoints/best-model/model.safetensors +3 -0
base_model/checkpoints/best-model/optimizers-checkpoint.pth +3 -0
base_model/checkpoints/best-model/train-state-checkpoint.pth +3 -0
base_model/model_architecture.txt +29 -0
base_model/train_parameters.yaml +136 -0

base_model/checkpoints/best-model/best-model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad7078ae75a5c4417ec0da095a242cb6cdcd874f1c2dd9191efab9c124800fe4
+size 52002366

base_model/checkpoints/best-model/config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "_attn_implementation_autoset": true,
+  "model_config": {
+    "attention_map": "softmax",
+    "attention_method": "linear",
+    "dim_embed": 256,
+    "dim_feedforward": 1024,
+    "dim_ffn_u_model": 1024,
+    "dim_hidden_u_model": 256,
+    "dim_max_trajectory": 3,
+    "dropout": 0.1,
+    "num_context_encoder_layers": 2,
+    "num_heads": 8,
+    "num_res_layer_u_model": 6,
+    "num_res_layers_functional_decoder": 8,
+    "use_bias_for_projection": true,
+    "use_bias_in_attention": true,
+    "use_query_residual_in_attention": true
+  },
+  "train_config": {
+    "corruption_model_type": "odeformer",
+    "loss_filter_nans": true,
+    "loss_type": "l1",
+    "max_sigma_trajectory_noise": 0.06,
+    "max_subsampling_ration": 0.5,
+    "train_type": "vector_field",
+    "train_with_normalized_head": true
+  },
+  "transformers_version": "4.46.0"
+}

base_model/checkpoints/best-model/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5df648066cb57306c558f4faa89399103cd24279c477c8db7676b182171b2d36
+size 51907384

base_model/checkpoints/best-model/optimizers-checkpoint.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f94f664040a5bfecb084568f6a99c9f89e502b7a8ec2c7b161f13b6987e09d30
+size 19288

base_model/checkpoints/best-model/train-state-checkpoint.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:008b081ce417e75defa6ab6cde494d1c6e7351244dc39622671b2e7b0662333e
+size 643246

base_model/model_architecture.txt ADDED Viewed

	@@ -0,0 +1,29 @@

+==============================================================================================================
+Layer (type:depth-idx)                                       Output Shape              Param #
+==============================================================================================================
+TrainingWrapper                                              --                        --
+├─FimOdeon: 1-1                                              --                        --
+│    └─TrajectoryEncoder: 2-1                                --                        896
+│    │    └─TransformerEncoder: 3-1                          [1, 1194, 256]            1,579,520
+│    └─Sequential: 2-2                                       --                        --
+│    │    └─Linear: 3-2                                      [1, 2400, 256]            1,024
+│    │    └─ReLU: 3-3                                        [1, 2400, 256]            --
+│    │    └─Linear: 3-4                                      [1, 2400, 256]            65,792
+│    └─AttentionOperator: 2-3                                --                        --
+│    │    └─ModuleList: 3-5                                  --                        6,318,080
+│    │    └─MLP: 3-6                                         [1, 2400, 3]              132,355
+├─UncertaintyEstimator: 1-2                                  --                        --
+│    └─AttentionOperator: 2-4                                --                        --
+│    │    └─ModuleList: 3-7                                  --                        4,738,560
+│    │    └─MLP: 3-8                                         [1, 2400, 1]              131,841
+==============================================================================================================
+Total params: 12,968,068
+Trainable params: 12,968,068
+Non-trainable params: 0
+Total mult-adds (Units.MEGABYTES): 12.97
+==============================================================================================================
+Input size (MB): 0.09
+Forward/backward pass size (MB): 771.15
+Params size (MB): 51.87
+Estimated Total Size (MB): 823.11
+==============================================================================================================

base_model/train_parameters.yaml ADDED Viewed

	@@ -0,0 +1,136 @@

+dataset:
+  add_dim_keys:
+    test: !!python/tuple
+    - drift_at_observations
+    train: !!python/tuple
+    - drift_at_observations
+    validation: !!python/tuple
+    - drift_at_observations
+  add_paths_keys:
+    test: !!python/tuple
+    - drift_at_observations
+    train: !!python/tuple
+    - drift_at_observations
+    validation: !!python/tuple
+    - drift_at_observations
+  batch_size:
+    test: 32
+    train: 64
+    validation: 32
+  data_dirs:
+    test: !!python/tuple
+    - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/test/test_deg_3
+    - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/test/test_deg_2
+    - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/test/test_deg_1
+    train: !!python/tuple
+    - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/train/train_deg_3
+    - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/train/train_deg_2
+    - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/train/train_deg_1
+    validation: !!python/tuple
+    - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/validation/val_deg_3
+    - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/validation/val_deg_2
+    - /lustre/mlnvme/data/s78mmaue_hpc-demo2/data_generation/data/123_600k_with_obs_drift/0/data/processed/train/30k_drift_deg_3_ablation_studies/degree_and_monomial_survival_uniform/validation/val_deg_1
+  dataset_name:
+    test: HeterogeneousFIMSDEDataset
+    train: StreamingFIMSDEDataset
+    validation: StreamingFIMSDEDataset
+  files_to_load:
+    drift_at_locations: drift_at_locations.h5
+    drift_at_observations: drift_at_observations.h5
+    locations: locations.h5
+    obs_mask: obs_mask.h5
+    obs_times: obs_times.h5
+    obs_values: obs_values.h5
+  max_dim: 3
+  name: FIMSDEDataloaderIterableDataset
+  num_locations:
+    test: null
+    train: 2000
+    validation: 10000
+  num_observations:
+    test: null
+    train: !!python/tuple
+    - 0
+    - 1801
+    validation: !!python/tuple
+    - 1799
+    - 1801
+  num_workers:
+    test: 0
+    train: 7
+    validation: 5
+  shard:
+    test: false
+    train: true
+    validation: true
+  shuffle_elements: true
+  shuffle_locations:
+    test: false
+    train: true
+    validation: true
+  shuffle_paths: true
+distributed:
+  activation_chekpoint: false
+  checkpoint_type: full_state
+  enabled: true
+  min_num_params: 1e5
+  sharding_strategy: NO_SHARD
+  wrap_policy: SIZE_BAZED
+experiment:
+  device_map: cuda
+  name: big_model_l1_600k_examples
+  name_add_date: true
+  seed: 10
+model:
+  model_config:
+    attention_map: softmax
+    attention_method: linear
+    dim_embed: 256
+    dim_feedforward: 1024
+    dim_ffn_u_model: 1024
+    dim_hidden_u_model: 256
+    dim_max_trajectory: 3
+    dropout: 0.1
+    num_context_encoder_layers: 2
+    num_heads: 8
+    num_res_layer_u_model: 6
+    num_res_layers_functional_decoder: 8
+    use_bias_for_projection: true
+    use_bias_in_attention: true
+    use_query_residual_in_attention: true
+  model_type: TrainingWrapper
+  train_config:
+    corruption_model_type: odeformer
+    loss_filter_nans: true
+    loss_type: l1
+    max_sigma_trajectory_noise: 0.06
+    max_subsampling_ration: 0.5
+    train_type: vector_field
+    train_with_normalized_head: true
+optimizers: !!python/tuple
+- optimizer_d:
+    gradient_norm_clipping: 10
+    lr: 1.0e-05
+    name: torch.optim.AdamW
+    weight_decay: 0.0001
+trainer:
+  best_metric: loss
+  debug_iterations: null
+  detect_anomaly: false
+  epochs: 2500
+  experiment_dir: ./results/
+  gradient_accumulation_steps: 1
+  logging_format: RANK_%(rank)s - %(asctime)s - %(name)s - %(levelname)s - %(message)s
+  name: Trainer
+  precision: bf16mixed
+  save_every: 1
+  schedulers: !!python/tuple
+  - beta: 1.0
+    label: drift_loss_scale
+    name: fim.utils.param_scheduler.ConstantScheduler