weatherforecast1024 commited on Jun 27, 2025

Commit

f3b050a

verified ·

1 Parent(s): ee6b828

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
FusionModel.egg-info/PKG-INFO +8 -0
FusionModel.egg-info/SOURCES.txt +11 -0
FusionModel.egg-info/dependency_links.txt +1 -0
FusionModel.egg-info/top_level.txt +2 -0
checkpoint/Unet/checkpoints/epoch_003.ckpt +3 -0
checkpoint/Unet/checkpoints/last.ckpt +3 -0
checkpoint/Unet/csv_logs/version_0/hparams.yaml +24 -0
checkpoint/Unet/csv_logs/version_0/metrics.csv +0 -0
checkpoint/Unet/wandb_logs/config.yaml +157 -0
checkpoint/Unet/wandb_logs/wandb/debug-internal.log +7 -0
checkpoint/Unet/wandb_logs/wandb/debug.log +22 -0
checkpoint/Unet/wandb_logs/wandb/run-20250617_085744-m5tg7yyl/files/output.log +0 -0
checkpoint/Unet/wandb_logs/wandb/run-20250617_085744-m5tg7yyl/files/wandb-summary.json +1 -0
checkpoint/Unet/wandb_logs/wandb/run-20250617_085744-m5tg7yyl/logs/debug-core.log +13 -0
checkpoint/Unet/wandb_logs/wandb/run-20250617_085744-m5tg7yyl/logs/debug-internal.log +17 -0
checkpoint/Unet/wandb_logs/wandb/run-20250617_085744-m5tg7yyl/logs/debug.log +15 -0
checkpoint/Unet/wandb_logs/wandb/run-20250617_085744-m5tg7yyl/run-m5tg7yyl.wandb +0 -0
checkpoint/Unet/wandb_logs/wandb/run-20250617_090527-0nx0l2dh/files/output.log +161 -0
checkpoint/Unet/wandb_logs/wandb/run-20250617_090527-0nx0l2dh/files/requirements.txt +77 -0
checkpoint/Unet/wandb_logs/wandb/run-20250617_090527-0nx0l2dh/files/wandb-metadata.json +85 -0
checkpoint/Unet/wandb_logs/wandb/run-20250617_090527-0nx0l2dh/logs/debug-core.log +7 -0
checkpoint/Unet/wandb_logs/wandb/run-20250617_090527-0nx0l2dh/logs/debug-internal.log +7 -0
checkpoint/Unet/wandb_logs/wandb/run-20250617_090527-0nx0l2dh/logs/debug.log +22 -0
checkpoint/Unet/wandb_logs/wandb/run-20250617_090527-0nx0l2dh/run-0nx0l2dh.wandb +3 -0
configs/AttR2Unet.yaml +86 -0
configs/AttUnet.yaml +86 -0
configs/Nothing.yaml +86 -0
configs/R2Unet.yaml +95 -0
configs/Unet.yaml +104 -0
pyproject.toml +21 -0
src/__pycache__/arch.cpython-310.pyc +0 -0
src/__pycache__/arch.cpython-312.pyc +0 -0
src/__pycache__/arch.cpython-38.pyc +0 -0
src/__pycache__/datamodule.cpython-310.pyc +0 -0
src/__pycache__/datamodule.cpython-312.pyc +0 -0
src/__pycache__/lr_scheduler.cpython-310.pyc +0 -0
src/__pycache__/lr_scheduler.cpython-312.pyc +0 -0
src/__pycache__/metric.cpython-310.pyc +0 -0
src/__pycache__/metric.cpython-312.pyc +0 -0
src/__pycache__/module.cpython-310.pyc +0 -0
src/__pycache__/module.cpython-312.pyc +0 -0
src/__pycache__/module.cpython-38.pyc +0 -0
src/__pycache__/train.cpython-38.pyc +0 -0
src/arch.py +473 -0
src/datamodule.py +341 -0
src/lr_scheduler.py +94 -0
src/metric.py +44 -0
src/module.py +168 -0
src/rad_clim.py +23 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+checkpoint/Unet/wandb_logs/wandb/run-20250617_090527-0nx0l2dh/run-0nx0l2dh.wandb filter=lfs diff=lfs merge=lfs -text

FusionModel.egg-info/PKG-INFO ADDED Viewed

	@@ -0,0 +1,8 @@

+Metadata-Version: 2.4
+Name: FusionModel
+Version: 0.3.1
+Author-email: Khanh Vinh Bui <khanhvinhbui0512@gmail.com>, Hong Trang Le <lhtrang@hcmut.edu.vn>
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown

FusionModel.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+pyproject.toml
+FusionModel.egg-info/PKG-INFO
+FusionModel.egg-info/SOURCES.txt
+FusionModel.egg-info/dependency_links.txt
+FusionModel.egg-info/top_level.txt
+src/arch.py
+src/datamodule.py
+src/lr_scheduler.py
+src/metric.py
+src/module.py
+src/train.py

FusionModel.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

FusionModel.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ configs
2	+ src

checkpoint/Unet/checkpoints/epoch_003.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9f005cf7c67d6259fdc39a5ccb425db8367dc96622457009fcb82a9df5123487
+size 521087

checkpoint/Unet/checkpoints/last.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9f005cf7c67d6259fdc39a5ccb425db8367dc96622457009fcb82a9df5123487
+size 521087

checkpoint/Unet/csv_logs/version_0/hparams.yaml ADDED Viewed

	@@ -0,0 +1,24 @@

+_instantiator: pytorch_lightning.cli.instantiate_module
+ablation: 'no'
+batch_size: 1
+beta_1: 0.9
+beta_2: 0.99
+dir_data: /data/weather2025/NhaBe/
+eta_min: 1.0e-08
+hours_predicted: 3
+lr: 0.0005
+max_epochs: 50
+num_workers: 4
+pin_memory: false
+pretrained_path: ''
+rad_inp_vars: precipitation
+rad_out_vars: precipitation
+rad_size: 400
+sat_inp_vars: total_precipitation
+sat_out_vars: total_precipitation
+sat_size: 25
+time_points_rad: 1
+time_points_sat: 1
+warmup_epochs: 10
+warmup_start_lr: 1.0e-08
+weight_decay: 1.0e-05

checkpoint/Unet/csv_logs/version_0/metrics.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint/Unet/wandb_logs/config.yaml ADDED Viewed

	@@ -0,0 +1,157 @@

+# pytorch_lightning==2.5.1.post0
+seed_everything: 42
+trainer:
+  accelerator: cuda
+  strategy: auto
+  devices:
+  - 6
+  num_nodes: 1
+  precision: 16-mixed
+  logger:
+  - class_path: pytorch_lightning.loggers.WandbLogger
+    init_args:
+      name: UnetNhaBe
+      save_dir: checkpoint/Unet/wandb_logs
+      version: null
+      offline: false
+      dir: null
+      id: null
+      anonymous: null
+      project: NhaBe
+      log_model: false
+      experiment: null
+      prefix: ''
+      checkpoint_name: null
+      entity: null
+      notes: null
+      tags: null
+      config: null
+      config_exclude_keys: null
+      config_include_keys: null
+      allow_val_change: null
+      group: null
+      job_type: null
+      mode: null
+      force: null
+      reinit: null
+      resume: null
+      resume_from: null
+      fork_from: null
+      save_code: null
+      tensorboard: null
+      sync_tensorboard: null
+      monitor_gym: null
+      settings: null
+  - class_path: pytorch_lightning.loggers.CSVLogger
+    init_args:
+      save_dir: checkpoint/Unet/csv_logs
+      name: null
+      version: null
+      prefix: ''
+      flush_logs_every_n_steps: 100
+  callbacks:
+  - class_path: pytorch_lightning.callbacks.LearningRateMonitor
+    init_args:
+      logging_interval: step
+      log_momentum: false
+      log_weight_decay: false
+  - class_path: pytorch_lightning.callbacks.ModelCheckpoint
+    init_args:
+      dirpath: checkpoint/Unet/checkpoints
+      filename: epoch_{epoch:03d}
+      monitor: val/mse
+      verbose: false
+      save_last: true
+      save_top_k: 1
+      save_weights_only: false
+      mode: min
+      auto_insert_metric_name: false
+      every_n_train_steps: null
+      train_time_interval: null
+      every_n_epochs: null
+      save_on_train_epoch_end: null
+      enable_version_counter: true
+  - class_path: pytorch_lightning.callbacks.EarlyStopping
+    init_args:
+      monitor: val/mse
+      min_delta: 0.0
+      patience: 10
+      verbose: false
+      mode: min
+      strict: true
+      check_finite: true
+      stopping_threshold: null
+      divergence_threshold: null
+      check_on_train_epoch_end: null
+      log_rank_zero_only: false
+  - class_path: pytorch_lightning.callbacks.RichModelSummary
+    init_args:
+      max_depth: -1
+  fast_dev_run: false
+  max_epochs: 100
+  min_epochs: 1
+  max_steps: -1
+  min_steps: null
+  max_time: null
+  limit_train_batches: null
+  limit_val_batches: null
+  limit_test_batches: null
+  limit_predict_batches: null
+  overfit_batches: 0.0
+  val_check_interval: null
+  check_val_every_n_epoch: 1
+  num_sanity_val_steps: null
+  log_every_n_steps: null
+  enable_checkpointing: true
+  enable_progress_bar: true
+  enable_model_summary: null
+  accumulate_grad_batches: 1
+  gradient_clip_val: null
+  gradient_clip_algorithm: null
+  deterministic: null
+  benchmark: null
+  inference_mode: true
+  use_distributed_sampler: true
+  profiler: null
+  detect_anomaly: false
+  barebones: false
+  plugins: null
+  sync_batchnorm: true
+  reload_dataloaders_every_n_epochs: 0
+  default_root_dir: checkpoint/Unet
+  model_registry: null
+model:
+  net:
+    class_path: arch.Network
+    init_args:
+      model_type: Unet
+      rad_channel: 1
+      sat_channel: 1
+      rad_size: 400
+      sat_size: 25
+  pretrained_path: ''
+  lr: 0.0005
+  beta_1: 0.9
+  beta_2: 0.99
+  weight_decay: 1.0e-05
+  warmup_epochs: 10
+  max_epochs: 50
+  warmup_start_lr: 1.0e-08
+  eta_min: 1.0e-08
+data:
+  dir_data: /data/weather2025/NhaBe/
+  batch_size: 1
+  hours_predicted: 3
+  num_workers: 4
+  pin_memory: false
+  time_points_rad: 1
+  time_points_sat: 1
+  sat_inp_vars: total_precipitation
+  sat_out_vars: total_precipitation
+  sat_size: 25
+  rad_inp_vars: precipitation
+  rad_out_vars: precipitation
+  rad_size: 400
+  ablation: 'no'
+optimizer: null
+lr_scheduler: null

checkpoint/Unet/wandb_logs/wandb/debug-internal.log ADDED Viewed

	@@ -0,0 +1,7 @@

+{"time":"2025-06-17T09:05:28.179242652Z","level":"INFO","msg":"stream: starting","core version":"0.20.1","symlink path":"checkpoint/Unet/wandb_logs/wandb/run-20250617_090527-0nx0l2dh/logs/debug-core.log"}
+{"time":"2025-06-17T09:05:29.423278937Z","level":"INFO","msg":"stream: created new stream","id":"0nx0l2dh"}
+{"time":"2025-06-17T09:05:29.423321777Z","level":"INFO","msg":"stream: started","id":"0nx0l2dh"}
+{"time":"2025-06-17T09:05:29.423393558Z","level":"INFO","msg":"sender: started","stream_id":"0nx0l2dh"}
+{"time":"2025-06-17T09:05:29.423393088Z","level":"INFO","msg":"writer: Do: started","stream_id":"0nx0l2dh"}
+{"time":"2025-06-17T09:05:29.423465179Z","level":"INFO","msg":"handler: started","stream_id":"0nx0l2dh"}
+{"time":"2025-06-17T09:05:30.100696875Z","level":"INFO","msg":"Starting system monitor"}

checkpoint/Unet/wandb_logs/wandb/debug.log ADDED Viewed

	@@ -0,0 +1,22 @@

+2025-06-17 09:05:27,961 INFO    MainThread:1311468 [wandb_setup.py:_flush():81] Current SDK version is 0.20.1
+2025-06-17 09:05:27,961 INFO    MainThread:1311468 [wandb_setup.py:_flush():81] Configure stats pid to 1311468
+2025-06-17 09:05:27,961 INFO    MainThread:1311468 [wandb_setup.py:_flush():81] Loading settings from /home/radaric/.config/wandb/settings
+2025-06-17 09:05:27,961 INFO    MainThread:1311468 [wandb_setup.py:_flush():81] Loading settings from /home/radaric/weather_forecast/Unet/wandb/settings
+2025-06-17 09:05:27,961 INFO    MainThread:1311468 [wandb_setup.py:_flush():81] Loading settings from environment variables
+2025-06-17 09:05:27,961 INFO    MainThread:1311468 [wandb_init.py:setup_run_log_directory():703] Logging user logs to checkpoint/Unet/wandb_logs/wandb/run-20250617_090527-0nx0l2dh/logs/debug.log
+2025-06-17 09:05:27,961 INFO    MainThread:1311468 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to checkpoint/Unet/wandb_logs/wandb/run-20250617_090527-0nx0l2dh/logs/debug-internal.log
+2025-06-17 09:05:27,961 INFO    MainThread:1311468 [wandb_init.py:init():831] calling init triggers
+2025-06-17 09:05:27,961 INFO    MainThread:1311468 [wandb_init.py:init():836] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-06-17 09:05:27,961 INFO    MainThread:1311468 [wandb_init.py:init():872] starting backend
+2025-06-17 09:05:28,169 INFO    MainThread:1311468 [wandb_init.py:init():875] sending inform_init request
+2025-06-17 09:05:28,174 INFO    MainThread:1311468 [wandb_init.py:init():883] backend started and connected
+2025-06-17 09:05:28,175 INFO    MainThread:1311468 [wandb_init.py:init():956] updated telemetry
+2025-06-17 09:05:28,175 INFO    MainThread:1311468 [wandb_init.py:init():980] communicating run to backend with 90.0 second timeout
+2025-06-17 09:05:30,098 INFO    MainThread:1311468 [wandb_init.py:init():1032] starting run threads in backend
+2025-06-17 09:05:30,175 INFO    MainThread:1311468 [wandb_run.py:_console_start():2453] atexit reg
+2025-06-17 09:05:30,175 INFO    MainThread:1311468 [wandb_run.py:_redirect():2301] redirect: wrap_raw
+2025-06-17 09:05:30,175 INFO    MainThread:1311468 [wandb_run.py:_redirect():2370] Wrapping output streams.
+2025-06-17 09:05:30,175 INFO    MainThread:1311468 [wandb_run.py:_redirect():2393] Redirects installed.
+2025-06-17 09:05:30,177 INFO    MainThread:1311468 [wandb_init.py:init():1078] run started, returning control to user process
+2025-06-17 09:05:31,151 INFO    MainThread:1311468 [wandb_run.py:_config_callback():1358] config_cb None None {'pretrained_path': '', 'lr': 0.0005, 'beta_1': 0.9, 'beta_2': 0.99, 'weight_decay': 1e-05, 'warmup_epochs': 10, 'max_epochs': 50, 'warmup_start_lr': 1e-08, 'eta_min': 1e-08, '_instantiator': 'pytorch_lightning.cli.instantiate_module', 'dir_data': '/data/weather2025/NhaBe/', 'batch_size': 1, 'hours_predicted': 3, 'num_workers': 4, 'pin_memory': False, 'time_points_rad': 1, 'time_points_sat': 1, 'sat_inp_vars': 'total_precipitation', 'sat_out_vars': 'total_precipitation', 'sat_size': 25, 'rad_inp_vars': 'precipitation', 'rad_out_vars': 'precipitation', 'rad_size': 400, 'ablation': 'no'}

checkpoint/Unet/wandb_logs/wandb/run-20250617_085744-m5tg7yyl/files/output.log ADDED Viewed

File without changes

checkpoint/Unet/wandb_logs/wandb/run-20250617_085744-m5tg7yyl/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_wandb":{"runtime":0}}

checkpoint/Unet/wandb_logs/wandb/run-20250617_085744-m5tg7yyl/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,13 @@

+{"time":"2025-06-17T08:57:44.288260722Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpbpuchipv/port-1289333.txt","pid":1289333,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-06-17T08:57:44.289762517Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":1289333}
+{"time":"2025-06-17T08:57:44.289701246Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":43767,"Zone":""}}
+{"time":"2025-06-17T08:57:44.468360629Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:33480"}
+{"time":"2025-06-17T08:57:44.478126346Z","level":"INFO","msg":"handleInformInit: received","streamId":"m5tg7yyl","id":"127.0.0.1:33480"}
+{"time":"2025-06-17T08:57:45.013693012Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"m5tg7yyl","id":"127.0.0.1:33480"}
+{"time":"2025-06-17T08:57:46.227115814Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"127.0.0.1:33480"}
+{"time":"2025-06-17T08:57:46.227331796Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-06-17T08:57:46.227313783Z","level":"INFO","msg":"connection: closing","id":"127.0.0.1:33480"}
+{"time":"2025-06-17T08:57:46.227453186Z","level":"INFO","msg":"connection: closed successfully","id":"127.0.0.1:33480"}
+{"time":"2025-06-17T08:57:46.48785785Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"127.0.0.1:33480"}
+{"time":"2025-06-17T08:57:46.487909579Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"127.0.0.1:33480"}
+{"time":"2025-06-17T08:57:46.487925552Z","level":"INFO","msg":"server is closed"}

checkpoint/Unet/wandb_logs/wandb/run-20250617_085744-m5tg7yyl/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,17 @@

+{"time":"2025-06-17T08:57:44.478779812Z","level":"INFO","msg":"stream: starting","core version":"0.20.1","symlink path":"checkpoint/Unet/wandb_logs/wandb/run-20250617_085744-m5tg7yyl/logs/debug-core.log"}
+{"time":"2025-06-17T08:57:45.013625369Z","level":"INFO","msg":"stream: created new stream","id":"m5tg7yyl"}
+{"time":"2025-06-17T08:57:45.013682966Z","level":"INFO","msg":"stream: started","id":"m5tg7yyl"}
+{"time":"2025-06-17T08:57:45.013709365Z","level":"INFO","msg":"handler: started","stream_id":"m5tg7yyl"}
+{"time":"2025-06-17T08:57:45.013744481Z","level":"INFO","msg":"sender: started","stream_id":"m5tg7yyl"}
+{"time":"2025-06-17T08:57:45.013733645Z","level":"INFO","msg":"writer: Do: started","stream_id":"m5tg7yyl"}
+{"time":"2025-06-17T08:57:45.224225022Z","level":"ERROR","msg":"HTTP error","status":403,"method":"POST","url":"https://api.wandb.ai/graphql"}
+{"time":"2025-06-17T08:57:45.22437671Z","level":"ERROR","msg":"runupserter: failed to init run","error":"returned error 403: {\"data\":{\"upsertBucket\":null},\"errors\":[{\"message\":\"permission denied\",\"path\":[\"upsertBucket\"],\"extensions\":{\"code\":\"PERMISSION_ERROR\"}}]}"}
+{"time":"2025-06-17T08:57:46.227328327Z","level":"INFO","msg":"stream: closing","id":"m5tg7yyl"}
+{"time":"2025-06-17T08:57:46.227825345Z","level":"ERROR","msg":"sender: uploadConfigFile: stream: no run"}
+{"time":"2025-06-17T08:57:46.486865753Z","level":"ERROR","msg":"HTTP error","status":404,"method":"POST","url":"https://api.wandb.ai/graphql"}
+{"time":"2025-06-17T08:57:46.486986554Z","level":"ERROR","msg":"runfiles: CreateRunFiles returned error: returned error 404: {\"data\":{\"createRunFiles\":null},\"errors\":[{\"message\":\"project vinh-bui0512-hcmut/NhaBe not found during createRunFiles\",\"path\":[\"createRunFiles\"]}]}"}
+{"time":"2025-06-17T08:57:46.487641258Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-06-17T08:57:46.487699694Z","level":"INFO","msg":"handler: closed","stream_id":"m5tg7yyl"}
+{"time":"2025-06-17T08:57:46.487714658Z","level":"INFO","msg":"writer: Close: closed","stream_id":"m5tg7yyl"}
+{"time":"2025-06-17T08:57:46.487745625Z","level":"INFO","msg":"sender: closed","stream_id":"m5tg7yyl"}
+{"time":"2025-06-17T08:57:46.487775923Z","level":"INFO","msg":"stream: closed","id":"m5tg7yyl"}

checkpoint/Unet/wandb_logs/wandb/run-20250617_085744-m5tg7yyl/logs/debug.log ADDED Viewed

	@@ -0,0 +1,15 @@

+2025-06-17 08:57:44,256 INFO    MainThread:1289333 [wandb_setup.py:_flush():81] Current SDK version is 0.20.1
+2025-06-17 08:57:44,256 INFO    MainThread:1289333 [wandb_setup.py:_flush():81] Configure stats pid to 1289333
+2025-06-17 08:57:44,256 INFO    MainThread:1289333 [wandb_setup.py:_flush():81] Loading settings from /home/radaric/.config/wandb/settings
+2025-06-17 08:57:44,256 INFO    MainThread:1289333 [wandb_setup.py:_flush():81] Loading settings from /home/radaric/weather_forecast/Unet/wandb/settings
+2025-06-17 08:57:44,256 INFO    MainThread:1289333 [wandb_setup.py:_flush():81] Loading settings from environment variables
+2025-06-17 08:57:44,256 INFO    MainThread:1289333 [wandb_init.py:setup_run_log_directory():703] Logging user logs to checkpoint/Unet/wandb_logs/wandb/run-20250617_085744-m5tg7yyl/logs/debug.log
+2025-06-17 08:57:44,256 INFO    MainThread:1289333 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to checkpoint/Unet/wandb_logs/wandb/run-20250617_085744-m5tg7yyl/logs/debug-internal.log
+2025-06-17 08:57:44,256 INFO    MainThread:1289333 [wandb_init.py:init():831] calling init triggers
+2025-06-17 08:57:44,256 INFO    MainThread:1289333 [wandb_init.py:init():836] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-06-17 08:57:44,256 INFO    MainThread:1289333 [wandb_init.py:init():872] starting backend
+2025-06-17 08:57:44,468 INFO    MainThread:1289333 [wandb_init.py:init():875] sending inform_init request
+2025-06-17 08:57:44,473 INFO    MainThread:1289333 [wandb_init.py:init():883] backend started and connected
+2025-06-17 08:57:44,475 INFO    MainThread:1289333 [wandb_init.py:init():956] updated telemetry
+2025-06-17 08:57:44,476 INFO    MainThread:1289333 [wandb_init.py:init():980] communicating run to backend with 90.0 second timeout

checkpoint/Unet/wandb_logs/wandb/run-20250617_085744-m5tg7yyl/run-m5tg7yyl.wandb ADDED Viewed

Binary file (366 Bytes). View file

checkpoint/Unet/wandb_logs/wandb/run-20250617_090527-0nx0l2dh/files/output.log ADDED Viewed

	@@ -0,0 +1,161 @@

+Number of train samples: 31462
+Number of test samples: 8077
+Number of val samples: 1398
+LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6]
+┏━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓
+┃[1;35m [0m[1;35m   [0m[1;35m [0m┃[1;35m [0m[1;35mName                           [0m[1;35m [0m┃[1;35m [0m[1;35mType       [0m[1;35m [0m┃[1;35m [0m[1;35mParams[0m[1;35m [0m┃[1;35m [0m[1;35mMode [0m[1;35m [0m┃
+┡━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩
+│[2m [0m[2m0  [0m[2m [0m│ net                             │ Network     │ 30.0 K │ train │
+│[2m [0m[2m1  [0m[2m [0m│ net.net                         │ Unet        │ 30.0 K │ train │
+│[2m [0m[2m2  [0m[2m [0m│ net.net.encoder_blocks          │ ModuleList  │  4.8 K │ train │
+│[2m [0m[2m3  [0m[2m [0m│ net.net.encoder_blocks.0        │ ConvBlock   │     66 │ train │
+│[2m [0m[2m4  [0m[2m [0m│ net.net.encoder_blocks.0.conv   │ Sequential  │     66 │ train │
+│[2m [0m[2m5  [0m[2m [0m│ net.net.encoder_blocks.0.conv.0 │ Conv2d      │     20 │ train │
+│[2m [0m[2m6  [0m[2m [0m│ net.net.encoder_blocks.0.conv.1 │ BatchNorm2d │      4 │ train │
+│[2m [0m[2m7  [0m[2m [0m│ net.net.encoder_blocks.0.conv.2 │ ReLU        │      0 │ train │
+│[2m [0m[2m8  [0m[2m [0m│ net.net.encoder_blocks.0.conv.3 │ Conv2d      │     38 │ train │
+│[2m [0m[2m9  [0m[2m [0m│ net.net.encoder_blocks.0.conv.4 │ BatchNorm2d │      4 │ train │
+│[2m [0m[2m10 [0m[2m [0m│ net.net.encoder_blocks.0.conv.5 │ ReLU        │      0 │ train │
+│[2m [0m[2m11 [0m[2m [0m│ net.net.encoder_blocks.1        │ ConvBlock   │    240 │ train │
+│[2m [0m[2m12 [0m[2m [0m│ net.net.encoder_blocks.1.conv   │ Sequential  │    240 │ train │
+│[2m [0m[2m13 [0m[2m [0m│ net.net.encoder_blocks.1.conv.0 │ Conv2d      │     76 │ train │
+│[2m [0m[2m14 [0m[2m [0m│ net.net.encoder_blocks.1.conv.1 │ BatchNorm2d │      8 │ train │
+│[2m [0m[2m15 [0m[2m [0m│ net.net.encoder_blocks.1.conv.2 │ ReLU        │      0 │ train │
+│[2m [0m[2m16 [0m[2m [0m│ net.net.encoder_blocks.1.conv.3 │ Conv2d      │    148 │ train │
+│[2m [0m[2m17 [0m[2m [0m│ net.net.encoder_blocks.1.conv.4 │ BatchNorm2d │      8 │ train │
+│[2m [0m[2m18 [0m[2m [0m│ net.net.encoder_blocks.1.conv.5 │ ReLU        │      0 │ train │
+│[2m [0m[2m19 [0m[2m [0m│ net.net.encoder_blocks.2        │ ConvBlock   │    912 │ train │
+│[2m [0m[2m20 [0m[2m [0m│ net.net.encoder_blocks.2.conv   │ Sequential  │    912 │ train │
+│[2m [0m[2m21 [0m[2m [0m│ net.net.encoder_blocks.2.conv.0 │ Conv2d      │    296 │ train │
+│[2m [0m[2m22 [0m[2m [0m│ net.net.encoder_blocks.2.conv.1 │ BatchNorm2d │     16 │ train │
+│[2m [0m[2m23 [0m[2m [0m│ net.net.encoder_blocks.2.conv.2 │ ReLU        │      0 │ train │
+│[2m [0m[2m24 [0m[2m [0m│ net.net.encoder_blocks.2.conv.3 │ Conv2d      │    584 │ train │
+│[2m [0m[2m25 [0m[2m [0m│ net.net.encoder_blocks.2.conv.4 │ BatchNorm2d │     16 │ train │
+│[2m [0m[2m26 [0m[2m [0m│ net.net.encoder_blocks.2.conv.5 │ ReLU        │      0 │ train │
+│[2m [0m[2m27 [0m[2m [0m│ net.net.encoder_blocks.3        │ ConvBlock   │  3.6 K │ train │
+│[2m [0m[2m28 [0m[2m [0m│ net.net.encoder_blocks.3.conv   │ Sequential  │  3.6 K │ train │
+│[2m [0m[2m29 [0m[2m [0m│ net.net.encoder_blocks.3.conv.0 │ Conv2d      │  1.2 K │ train │
+│[2m [0m[2m30 [0m[2m [0m│ net.net.encoder_blocks.3.conv.1 │ BatchNorm2d │     32 │ train │
+│[2m [0m[2m31 [0m[2m [0m│ net.net.encoder_blocks.3.conv.2 │ ReLU        │      0 │ train │
+│[2m [0m[2m32 [0m[2m [0m│ net.net.encoder_blocks.3.conv.3 │ Conv2d      │  2.3 K │ train │
+│[2m [0m[2m33 [0m[2m [0m│ net.net.encoder_blocks.3.conv.4 │ BatchNorm2d │     32 │ train │
+│[2m [0m[2m34 [0m[2m [0m│ net.net.encoder_blocks.3.conv.5 │ ReLU        │      0 │ train │
+│[2m [0m[2m35 [0m[2m [0m│ net.net.pools                   │ ModuleList  │      0 │ train │
+│[2m [0m[2m36 [0m[2m [0m│ net.net.pools.0                 │ MaxPool2d   │      0 │ train │
+│[2m [0m[2m37 [0m[2m [0m│ net.net.pools.1                 │ MaxPool2d   │      0 │ train │
+│[2m [0m[2m38 [0m[2m [0m│ net.net.pools.2                 │ MaxPool2d   │      0 │ train │
+│[2m [0m[2m39 [0m[2m [0m│ net.net.pools.3                 │ MaxPool2d   │      0 │ train │
+│[2m [0m[2m40 [0m[2m [0m│ net.net.mid_conv_1              │ single_conv │  2.4 K │ train │
+│[2m [0m[2m41 [0m[2m [0m│ net.net.mid_conv_1.conv         │ Sequential  │  2.4 K │ train │
+│[2m [0m[2m42 [0m[2m [0m│ net.net.mid_conv_1.conv.0       │ Conv2d      │  2.3 K │ train │
+│[2m [0m[2m43 [0m[2m [0m│ net.net.mid_conv_1.conv.1       │ BatchNorm2d │     32 │ train │
+│[2m [0m[2m44 [0m[2m [0m│ net.net.mid_conv_1.conv.2       │ ReLU        │      0 │ train │
+│[2m [0m[2m45 [0m[2m [0m│ net.net.mid_conv_2              │ single_conv │    192 │ train │
+│[2m [0m[2m46 [0m[2m [0m│ net.net.mid_conv_2.conv         │ Sequential  │    192 │ train │
+│[2m [0m[2m47 [0m[2m [0m│ net.net.mid_conv_2.conv.0       │ Conv2d      │    160 │ train │
+│[2m [0m[2m48 [0m[2m [0m│ net.net.mid_conv_2.conv.1       │ BatchNorm2d │     32 │ train │
+│[2m [0m[2m49 [0m[2m [0m│ net.net.mid_conv_2.conv.2       │ ReLU        │      0 │ train │
+│[2m [0m[2m50 [0m[2m [0m│ net.net.mid_merge               │ ConvBlock   │  7.0 K │ train │
+│[2m [0m[2m51 [0m[2m [0m│ net.net.mid_merge.conv          │ Sequential  │  7.0 K │ train │
+│[2m [0m[2m52 [0m[2m [0m│ net.net.mid_merge.conv.0        │ Conv2d      │  4.6 K │ train │
+│[2m [0m[2m53 [0m[2m [0m│ net.net.mid_merge.conv.1        │ BatchNorm2d │     32 │ train │
+│[2m [0m[2m54 [0m[2m [0m│ net.net.mid_merge.conv.2        │ ReLU        │      0 │ train │
+│[2m [0m[2m55 [0m[2m [0m│ net.net.mid_merge.conv.3        │ Conv2d      │  2.3 K │ train │
+│[2m [0m[2m56 [0m[2m [0m│ net.net.mid_merge.conv.4        │ BatchNorm2d │     32 │ train │
+│[2m [0m[2m57 [0m[2m [0m│ net.net.mid_merge.conv.5        │ ReLU        │      0 │ train │
+│[2m [0m[2m58 [0m[2m [0m│ net.net.up_convs                │ ModuleList  │  6.2 K │ train │
+│[2m [0m[2m59 [0m[2m [0m│ net.net.up_convs.0              │ UpConv      │  4.7 K │ train │
+│[2m [0m[2m60 [0m[2m [0m│ net.net.up_convs.0.up           │ Sequential  │  4.7 K │ train │
+│[2m [0m[2m61 [0m[2m [0m│ net.net.up_convs.0.up.0         │ Upsample    │      0 │ train │
+│[2m [0m[2m62 [0m[2m [0m│ net.net.up_convs.0.up.1         │ Conv2d      │  4.6 K │ train │
+│[2m [0m[2m63 [0m[2m [0m│ net.net.up_convs.0.up.2         │ BatchNorm2d │     32 │ train │
+│[2m [0m[2m64 [0m[2m [0m│ net.net.up_convs.0.up.3         │ ReLU        │      0 │ train │
+│[2m [0m[2m65 [0m[2m [0m│ net.net.up_convs.1              │ UpConv      │  1.2 K │ train │
+│[2m [0m[2m66 [0m[2m [0m│ net.net.up_convs.1.up           │ Sequential  │  1.2 K │ train │
+│[2m [0m[2m67 [0m[2m [0m│ net.net.up_convs.1.up.0         │ Upsample    │      0 │ train │
+│[2m [0m[2m68 [0m[2m [0m│ net.net.up_convs.1.up.1         │ Conv2d      │  1.2 K │ train │
+│[2m [0m[2m69 [0m[2m [0m│ net.net.up_convs.1.up.2         │ BatchNorm2d │     16 │ train │
+│[2m [0m[2m70 [0m[2m [0m│ net.net.up_convs.1.up.3         │ ReLU        │      0 │ train │
+│[2m [0m[2m71 [0m[2m [0m│ net.net.up_convs.2              │ UpConv      │    300 │ train │
+│[2m [0m[2m72 [0m[2m [0m│ net.net.up_convs.2.up           │ Sequential  │    300 │ train │
+│[2m [0m[2m73 [0m[2m [0m│ net.net.up_convs.2.up.0         │ Upsample    │      0 │ train │
+│[2m [0m[2m74 [0m[2m [0m│ net.net.up_convs.2.up.1         │ Conv2d      │    292 │ train │
+│[2m [0m[2m75 [0m[2m [0m│ net.net.up_convs.2.up.2         │ BatchNorm2d │      8 │ train │
+│[2m [0m[2m76 [0m[2m [0m│ net.net.up_convs.2.up.3         │ ReLU        │      0 │ train │
+│[2m [0m[2m77 [0m[2m [0m│ net.net.up_convs.3              │ UpConv      │     78 │ train │
+│[2m [0m[2m78 [0m[2m [0m│ net.net.up_convs.3.up           │ Sequential  │     78 │ train │
+│[2m [0m[2m79 [0m[2m [0m│ net.net.up_convs.3.up.0         │ Upsample    │      0 │ train │
+│[2m [0m[2m80 [0m[2m [0m│ net.net.up_convs.3.up.1         │ Conv2d      │     74 │ train │
+│[2m [0m[2m81 [0m[2m [0m│ net.net.up_convs.3.up.2         │ BatchNorm2d │      4 │ train │
+│[2m [0m[2m82 [0m[2m [0m│ net.net.up_convs.3.up.3         │ ReLU        │      0 │ train │
+│[2m [0m[2m83 [0m[2m [0m│ net.net.decoder_blocks          │ ModuleList  │  9.4 K │ train │
+│[2m [0m[2m84 [0m[2m [0m│ net.net.decoder_blocks.0        │ ConvBlock   │  7.0 K │ train │
+│[2m [0m[2m85 [0m[2m [0m│ net.net.decoder_blocks.0.conv   │ Sequential  │  7.0 K │ train │
+│[2m [0m[2m86 [0m[2m [0m│ net.net.decoder_blocks.0.conv.0 │ Conv2d      │  4.6 K │ train │
+│[2m [0m[2m87 [0m[2m [0m│ net.net.decoder_blocks.0.conv.1 │ BatchNorm2d │     32 │ train │
+│[2m [0m[2m88 [0m[2m [0m│ net.net.decoder_blocks.0.conv.2 │ ReLU        │      0 │ train │
+│[2m [0m[2m89 [0m[2m [0m│ net.net.decoder_blocks.0.conv.3 │ Conv2d      │  2.3 K │ train │
+│[2m [0m[2m90 [0m[2m [0m│ net.net.decoder_blocks.0.conv.4 │ BatchNorm2d │     32 │ train │
+│[2m [0m[2m91 [0m[2m [0m│ net.net.decoder_blocks.0.conv.5 │ ReLU        │      0 │ train │
+│[2m [0m[2m92 [0m[2m [0m│ net.net.decoder_blocks.1        │ ConvBlock   │  1.8 K │ train │
+│[2m [0m[2m93 [0m[2m [0m│ net.net.decoder_blocks.1.conv   │ Sequential  │  1.8 K │ train │
+│[2m [0m[2m94 [0m[2m [0m│ net.net.decoder_blocks.1.conv.0 │ Conv2d      │  1.2 K │ train │
+│[2m [0m[2m95 [0m[2m [0m│ net.net.decoder_blocks.1.conv.1 │ BatchNorm2d │     16 │ train │
+│[2m [0m[2m96 [0m[2m [0m│ net.net.decoder_blocks.1.conv.2 │ ReLU        │      0 │ train │
+│[2m [0m[2m97 [0m[2m [0m│ net.net.decoder_blocks.1.conv.3 │ Conv2d      │    584 │ train │
+│[2m [0m[2m98 [0m[2m [0m│ net.net.decoder_blocks.1.conv.4 │ BatchNorm2d │     16 │ train │
+│[2m [0m[2m99 [0m[2m [0m│ net.net.decoder_blocks.1.conv.5 │ ReLU        │      0 │ train │
+│[2m [0m[2m100[0m[2m [0m│ net.net.decoder_blocks.2        │ ConvBlock   │    456 │ train │
+│[2m [0m[2m101[0m[2m [0m│ net.net.decoder_blocks.2.conv   │ Sequential  │    456 │ train │
+│[2m [0m[2m102[0m[2m [0m│ net.net.decoder_blocks.2.conv.0 │ Conv2d      │    292 │ train │
+│[2m [0m[2m103[0m[2m [0m│ net.net.decoder_blocks.2.conv.1 │ BatchNorm2d │      8 │ train │
+│[2m [0m[2m104[0m[2m [0m│ net.net.decoder_blocks.2.conv.2 │ ReLU        │      0 │ train │
+│[2m [0m[2m105[0m[2m [0m│ net.net.decoder_blocks.2.conv.3 │ Conv2d      │    148 │ train │
+│[2m [0m[2m106[0m[2m [0m│ net.net.decoder_blocks.2.conv.4 │ BatchNorm2d │      8 │ train │
+│[2m [0m[2m107[0m[2m [0m│ net.net.decoder_blocks.2.conv.5 │ ReLU        │      0 │ train │
+│[2m [0m[2m108[0m[2m [0m│ net.net.decoder_blocks.3        │ ConvBlock   │    120 │ train │
+│[2m [0m[2m109[0m[2m [0m│ net.net.decoder_blocks.3.conv   │ Sequential  │    120 │ train │
+│[2m [0m[2m110[0m[2m [0m│ net.net.decoder_blocks.3.conv.0 │ Conv2d      │     74 │ train │
+│[2m [0m[2m111[0m[2m [0m│ net.net.decoder_blocks.3.conv.1 │ BatchNorm2d │      4 │ train │
+│[2m [0m[2m112[0m[2m [0m│ net.net.decoder_blocks.3.conv.2 │ ReLU        │      0 │ train │
+│[2m [0m[2m113[0m[2m [0m│ net.net.decoder_blocks.3.conv.3 │ Conv2d      │     38 │ train │
+│[2m [0m[2m114[0m[2m [0m│ net.net.decoder_blocks.3.conv.4 │ BatchNorm2d │      4 │ train │
+│[2m [0m[2m115[0m[2m [0m│ net.net.decoder_blocks.3.conv.5 │ ReLU        │      0 │ train │
+│[2m [0m[2m116[0m[2m [0m│ net.net.final_decoder           │ ConvBlock   │    120 │ train │
+│[2m [0m[2m117[0m[2m [0m│ net.net.final_decoder.conv      │ Sequential  │    120 │ train │
+│[2m [0m[2m118[0m[2m [0m│ net.net.final_decoder.conv.0    │ Conv2d      │     74 │ train │
+│[2m [0m[2m119[0m[2m [0m│ net.net.final_decoder.conv.1    │ BatchNorm2d │      4 │ train │
+│[2m [0m[2m120[0m[2m [0m│ net.net.final_decoder.conv.2    │ ReLU        │      0 │ train │
+│[2m [0m[2m121[0m[2m [0m│ net.net.final_decoder.conv.3    │ Conv2d      │     38 │ train │
+│[2m [0m[2m122[0m[2m [0m│ net.net.final_decoder.conv.4    │ BatchNorm2d │      4 │ train │
+│[2m [0m[2m123[0m[2m [0m│ net.net.final_decoder.conv.5    │ ReLU        │      0 │ train │
+│[2m [0m[2m124[0m[2m [0m│ net.net.out_conv_R              │ Conv2d      │      3 │ train │
+│[2m [0m[2m125[0m[2m [0m│ net.net.out_conv_S              │ Conv2d      │     17 │ train │
+│[2m [0m[2m126[0m[2m [0m│ rad_denormalization             │ Normalize   │      0 │ train │
+│[2m [0m[2m127[0m[2m [0m│ sat_denormalization             │ Normalize   │      0 │ train │
+└─────┴─────────────────────────────────┴─────────────┴────────┴───────┘
+[1mTrainable params[0m: 30.0 K
+[1mNon-trainable params[0m: 0
+[1mTotal params[0m: 30.0 K
+[1mTotal estimated model params size (MB)[0m: 0
+[1mModules in train mode[0m: 128
+[1mModules in eval mode[0m: 0
+Epoch 4:  17%|▏| 5205/31462 [02:33<12:54, 33.89it/s, v_num=dh_0, train/rad=0.120, train/sat=2.380, train/mse=2.500, val/rad=1.970, val/sat=1.140, val/mse
+/home/radaric/.conda/envs/unet/lib/python3.10/site-packages/torch/optim/lr_scheduler.py:182: UserWarning: Detected call of `lr_scheduler.step()` before `optimizer.step()`. In PyTorch 1.1.0 and later, you should call them in the opposite order: `optimizer.step()` before `lr_scheduler.step()`.  Failure to do this will result in PyTorch skipping the first value of the learning rate schedule. See more details at https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
+  warnings.warn(
+Detected KeyboardInterrupt, attempting graceful shutdown ...
+Exception ignored in atexit callback: <function _start_and_connect_service.<locals>.teardown_atexit at 0x7fafa15b5360>
+Traceback (most recent call last):
+  File "/home/radaric/.conda/envs/unet/lib/python3.10/site-packages/wandb/sdk/lib/service_connection.py", line 90, in teardown_atexit
+    conn.teardown(hooks.exit_code)
+  File "/home/radaric/.conda/envs/unet/lib/python3.10/site-packages/wandb/sdk/lib/service_connection.py", line 218, in teardown
+    self._router.join()
+  File "/home/radaric/.conda/envs/unet/lib/python3.10/site-packages/wandb/sdk/interface/router.py", line 75, in join
+    self._thread.join()
+  File "/home/radaric/.conda/envs/unet/lib/python3.10/threading.py", line 1096, in join
+    self._wait_for_tstate_lock()
+  File "/home/radaric/.conda/envs/unet/lib/python3.10/threading.py", line 1116, in _wait_for_tstate_lock
+    if lock.acquire(block, timeout):
+KeyboardInterrupt:

checkpoint/Unet/wandb_logs/wandb/run-20250617_090527-0nx0l2dh/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,77 @@

+urllib3==2.4.0
+requests==2.32.4
+typing-inspection==0.4.1
+Jinja2==3.1.6
+MarkupSafe==3.0.2
+setuptools==78.1.1
+frozenlist==1.7.0
+aiosignal==1.3.2
+docstring_parser==0.16
+aiohappyeyeballs==2.6.1
+ClimaX==0.3.1
+platformdirs==4.3.8
+async-timeout==5.0.1
+nvidia-cusolver-cu12==11.7.1.2
+protobuf==6.31.1
+charset-normalizer==3.4.2
+attrs==25.3.0
+pip==25.1
+nvidia-cufile-cu12==1.11.1.6
+importlib_resources==6.5.2
+nvidia-nvjitlink-cu12==12.6.85
+numpy==2.2.6
+typeshed_client==2.7.0
+jsonargparse==4.40.0
+nvidia-cusparselt-cu12==0.6.3
+GitPython==3.1.44
+nvidia-cusparse-cu12==12.5.4.2
+mpmath==1.3.0
+pytorch-lightning==2.5.1.post0
+torchvision==0.22.1
+PyYAML==6.0.2
+nvidia-cudnn-cu12==9.5.1.17
+markdown-it-py==3.0.0
+typing_extensions==4.14.0
+smmap==5.0.2
+pydantic_core==2.33.2
+torchsummary==1.5.1
+nvidia-cublas-cu12==12.6.4.1
+FusionModel==0.3.1
+mdurl==0.1.2
+sentry-sdk==2.30.0
+nvidia-curand-cu12==10.3.7.77
+idna==3.10
+triton==3.3.1
+multidict==6.4.4
+Pygments==2.19.1
+nvidia-cuda-cupti-cu12==12.6.80
+tqdm==4.67.1
+psutil==7.0.0
+gitdb==4.0.12
+fsspec==2025.5.1
+pydantic==2.11.6
+sympy==1.14.0
+torchaudio==2.7.1
+nvidia-nccl-cu12==2.26.2
+propcache==0.3.2
+wandb==0.20.1
+filelock==3.18.0
+packaging==25.0
+nvidia-cuda-nvrtc-cu12==12.6.77
+networkx==3.4.2
+aiohttp==3.12.12
+nvidia-cufft-cu12==11.3.0.4
+nvidia-nvtx-cu12==12.6.77
+wheel==0.45.1
+yarl==1.20.1
+certifi==2025.4.26
+click==8.2.1
+nvidia-cuda-runtime-cu12==12.6.77
+rich==14.0.0
+pillow==11.2.1
+setproctitle==1.3.6
+torchmetrics==1.7.3
+lightning-utilities==0.14.3
+torch==2.7.1
+annotated-types==0.7.0
+ClimaX==0.3.1

checkpoint/Unet/wandb_logs/wandb/run-20250617_090527-0nx0l2dh/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,85 @@

+{
+  "os": "Linux-5.4.0-208-generic-x86_64-with-glibc2.31",
+  "python": "CPython 3.10.18",
+  "startedAt": "2025-06-17T09:05:28.174321Z",
+  "args": [
+    "--config",
+    "configs/Unet.yaml"
+  ],
+  "program": "/home/radaric/weather_forecast/Unet/src/train.py",
+  "codePath": "src/train.py",
+  "email": "weatherforecast1024hcmut@gmail.com",
+  "root": "checkpoint/Unet/wandb_logs",
+  "host": "u116613",
+  "executable": "/home/radaric/.conda/envs/unet/bin/python",
+  "codePathLocal": "src/train.py",
+  "cpu_count": 48,
+  "cpu_count_logical": 96,
+  "gpu": "NVIDIA RTX A6000",
+  "gpu_count": 7,
+  "disk": {
+    "/": {
+      "total": "1877998821376",
+      "used": "1470173900800"
+    }
+  },
+  "memory": {
+    "total": "540953096192"
+  },
+  "cpu": {
+    "count": 48,
+    "countLogical": 96
+  },
+  "gpu_nvidia": [
+    {
+      "name": "NVIDIA RTX A6000",
+      "memoryTotal": "51527024640",
+      "cudaCores": 10752,
+      "architecture": "Ampere",
+      "uuid": "GPU-fb5a2de4-c79a-f2d0-a864-a6271ad28ae6"
+    },
+    {
+      "name": "NVIDIA RTX A6000",
+      "memoryTotal": "51527024640",
+      "cudaCores": 10752,
+      "architecture": "Ampere",
+      "uuid": "GPU-1a8c199b-93ca-3fec-6459-a5515bf1b12b"
+    },
+    {
+      "name": "NVIDIA RTX A6000",
+      "memoryTotal": "51527024640",
+      "cudaCores": 10752,
+      "architecture": "Ampere",
+      "uuid": "GPU-4d0c0cac-f72d-9dc7-9ac0-60cf8803134b"
+    },
+    {
+      "name": "NVIDIA RTX A6000",
+      "memoryTotal": "51527024640",
+      "cudaCores": 10752,
+      "architecture": "Ampere",
+      "uuid": "GPU-2887d599-b7bf-d31f-4425-84fa60413306"
+    },
+    {
+      "name": "NVIDIA RTX A6000",
+      "memoryTotal": "51527024640",
+      "cudaCores": 10752,
+      "architecture": "Ampere",
+      "uuid": "GPU-86e7c8f1-cde6-4163-dc15-52cef50545bd"
+    },
+    {
+      "name": "NVIDIA RTX A6000",
+      "memoryTotal": "51527024640",
+      "cudaCores": 10752,
+      "architecture": "Ampere",
+      "uuid": "GPU-460d754a-f551-6943-c142-b5b8f2f86236"
+    },
+    {
+      "name": "NVIDIA RTX A6000",
+      "memoryTotal": "51527024640",
+      "cudaCores": 10752,
+      "architecture": "Ampere",
+      "uuid": "GPU-553ca63b-335c-4c11-94eb-29c777adb307"
+    }
+  ],
+  "cudaVersion": "12.3"
+}

checkpoint/Unet/wandb_logs/wandb/run-20250617_090527-0nx0l2dh/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,7 @@

+{"time":"2025-06-17T09:05:27.98855733Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpng05fvru/port-1311468.txt","pid":1311468,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-06-17T09:05:27.99003933Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":1311468}
+{"time":"2025-06-17T09:05:27.99004801Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":46731,"Zone":""}}
+{"time":"2025-06-17T09:05:28.169034214Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:38220"}
+{"time":"2025-06-17T09:05:28.178996979Z","level":"INFO","msg":"handleInformInit: received","streamId":"0nx0l2dh","id":"127.0.0.1:38220"}
+{"time":"2025-06-17T09:05:29.423327647Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"0nx0l2dh","id":"127.0.0.1:38220"}
+{"time":"2025-06-17T10:10:24.148447187Z","level":"INFO","msg":"Parent process exited, terminating service process."}

checkpoint/Unet/wandb_logs/wandb/run-20250617_090527-0nx0l2dh/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,7 @@

+{"time":"2025-06-17T09:05:28.179242652Z","level":"INFO","msg":"stream: starting","core version":"0.20.1","symlink path":"checkpoint/Unet/wandb_logs/wandb/run-20250617_090527-0nx0l2dh/logs/debug-core.log"}
+{"time":"2025-06-17T09:05:29.423278937Z","level":"INFO","msg":"stream: created new stream","id":"0nx0l2dh"}
+{"time":"2025-06-17T09:05:29.423321777Z","level":"INFO","msg":"stream: started","id":"0nx0l2dh"}
+{"time":"2025-06-17T09:05:29.423393558Z","level":"INFO","msg":"sender: started","stream_id":"0nx0l2dh"}
+{"time":"2025-06-17T09:05:29.423393088Z","level":"INFO","msg":"writer: Do: started","stream_id":"0nx0l2dh"}
+{"time":"2025-06-17T09:05:29.423465179Z","level":"INFO","msg":"handler: started","stream_id":"0nx0l2dh"}
+{"time":"2025-06-17T09:05:30.100696875Z","level":"INFO","msg":"Starting system monitor"}

checkpoint/Unet/wandb_logs/wandb/run-20250617_090527-0nx0l2dh/logs/debug.log ADDED Viewed

	@@ -0,0 +1,22 @@

+2025-06-17 09:05:27,961 INFO    MainThread:1311468 [wandb_setup.py:_flush():81] Current SDK version is 0.20.1
+2025-06-17 09:05:27,961 INFO    MainThread:1311468 [wandb_setup.py:_flush():81] Configure stats pid to 1311468
+2025-06-17 09:05:27,961 INFO    MainThread:1311468 [wandb_setup.py:_flush():81] Loading settings from /home/radaric/.config/wandb/settings
+2025-06-17 09:05:27,961 INFO    MainThread:1311468 [wandb_setup.py:_flush():81] Loading settings from /home/radaric/weather_forecast/Unet/wandb/settings
+2025-06-17 09:05:27,961 INFO    MainThread:1311468 [wandb_setup.py:_flush():81] Loading settings from environment variables
+2025-06-17 09:05:27,961 INFO    MainThread:1311468 [wandb_init.py:setup_run_log_directory():703] Logging user logs to checkpoint/Unet/wandb_logs/wandb/run-20250617_090527-0nx0l2dh/logs/debug.log
+2025-06-17 09:05:27,961 INFO    MainThread:1311468 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to checkpoint/Unet/wandb_logs/wandb/run-20250617_090527-0nx0l2dh/logs/debug-internal.log
+2025-06-17 09:05:27,961 INFO    MainThread:1311468 [wandb_init.py:init():831] calling init triggers
+2025-06-17 09:05:27,961 INFO    MainThread:1311468 [wandb_init.py:init():836] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-06-17 09:05:27,961 INFO    MainThread:1311468 [wandb_init.py:init():872] starting backend
+2025-06-17 09:05:28,169 INFO    MainThread:1311468 [wandb_init.py:init():875] sending inform_init request
+2025-06-17 09:05:28,174 INFO    MainThread:1311468 [wandb_init.py:init():883] backend started and connected
+2025-06-17 09:05:28,175 INFO    MainThread:1311468 [wandb_init.py:init():956] updated telemetry
+2025-06-17 09:05:28,175 INFO    MainThread:1311468 [wandb_init.py:init():980] communicating run to backend with 90.0 second timeout
+2025-06-17 09:05:30,098 INFO    MainThread:1311468 [wandb_init.py:init():1032] starting run threads in backend
+2025-06-17 09:05:30,175 INFO    MainThread:1311468 [wandb_run.py:_console_start():2453] atexit reg
+2025-06-17 09:05:30,175 INFO    MainThread:1311468 [wandb_run.py:_redirect():2301] redirect: wrap_raw
+2025-06-17 09:05:30,175 INFO    MainThread:1311468 [wandb_run.py:_redirect():2370] Wrapping output streams.
+2025-06-17 09:05:30,175 INFO    MainThread:1311468 [wandb_run.py:_redirect():2393] Redirects installed.
+2025-06-17 09:05:30,177 INFO    MainThread:1311468 [wandb_init.py:init():1078] run started, returning control to user process
+2025-06-17 09:05:31,151 INFO    MainThread:1311468 [wandb_run.py:_config_callback():1358] config_cb None None {'pretrained_path': '', 'lr': 0.0005, 'beta_1': 0.9, 'beta_2': 0.99, 'weight_decay': 1e-05, 'warmup_epochs': 10, 'max_epochs': 50, 'warmup_start_lr': 1e-08, 'eta_min': 1e-08, '_instantiator': 'pytorch_lightning.cli.instantiate_module', 'dir_data': '/data/weather2025/NhaBe/', 'batch_size': 1, 'hours_predicted': 3, 'num_workers': 4, 'pin_memory': False, 'time_points_rad': 1, 'time_points_sat': 1, 'sat_inp_vars': 'total_precipitation', 'sat_out_vars': 'total_precipitation', 'sat_size': 25, 'rad_inp_vars': 'precipitation', 'rad_out_vars': 'precipitation', 'rad_size': 400, 'ablation': 'no'}

checkpoint/Unet/wandb_logs/wandb/run-20250617_090527-0nx0l2dh/run-0nx0l2dh.wandb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:72554d0fd15b4685f86d20808dfa02fa74043afb99ece44f6b8184bd0a6f9bfc
+size 66093056

configs/AttR2Unet.yaml ADDED Viewed

	@@ -0,0 +1,86 @@

+seed_everything: 42
+# ---------------------------- TRAINER -------------------------------------------
+trainer:
+  default_root_dir: "checkpoint/AttR2Unet"
+  precision: "16-mixed"
+  min_epochs: 1
+  max_epochs: 100
+  accelerator: cuda
+  # limit_train_batches: 10
+  devices: [6]
+  # strategy: ddp
+  num_nodes: 1
+  enable_progress_bar: true
+  sync_batchnorm: True
+  enable_checkpointing: True
+  # debugging
+  fast_dev_run: false
+  logger:
+    class_path: pytorch_lightning.loggers.CSVLogger
+    init_args:
+      save_dir: "checkpoint/AttR2Unet/logs"
+      name: null
+      version: null
+  callbacks:
+    - class_path: pytorch_lightning.callbacks.LearningRateMonitor
+      init_args:
+        logging_interval: "step"
+    - class_path: pytorch_lightning.callbacks.ModelCheckpoint
+      init_args:
+        dirpath: "checkpoint/AttR2Unet/checkpoints"
+        monitor: "val/mse" # name of the logged metric which determines when model is improving
+        mode: "min" # "max" means higher metric value is better, can be also "min"
+        save_top_k: 1 # save k best models (determined by above metric)
+        save_last: True # additionally always save model from last epoch
+        verbose: False
+        filename: "epoch_{epoch:03d}"
+        auto_insert_metric_name: False
+    - class_path: pytorch_lightning.callbacks.EarlyStopping
+      init_args:
+        monitor: "val/mse" # name of the logged metric which determines when model is improving
+        mode: "min" # "max" means higher metric value is better, can be also "min"
+        patience: 10 # how many validation epochs of not improving until training stops
+        min_delta: 0. # minimum change in the monitored metric needed to qualify as an improvement
+    - class_path: pytorch_lightning.callbacks.RichModelSummary
+      init_args:
+        max_depth: -1
+    - class_path: pytorch_lightning.callbacks.RichProgressBar
+# ---------------------------- MODEL -------------------------------------------
+model:
+  pretrained_path: ""
+  beta_1: 0.9
+  beta_2: 0.99
+  lr: 5e-4
+  weight_decay: 1e-5
+  warmup_epochs: 10
+  max_epochs: 50
+  warmup_start_lr: 1e-8
+  eta_min: 1e-8
+  net:
+    model_type: "AttR2Unet"
+    num_channel: 1
+# ---------------------------- DATA -------------------------------------------
+data:
+  dir_data: "/data/data_WF/ablation/ablation_time"
+  ablation: "time"
+  sat_size: 20
+  rad_size: 640
+  time_points_rad: 1
+  time_points_sat: 1
+  sat_inp_vars: ["total_precipitation"]
+  sat_out_vars: "total_precipitation"
+  rad_inp_vars: ["precipitation"]
+  rad_out_vars: "precipitation"
+  hours_predicted: 3
+  batch_size: 32
+  num_workers: 4
+  pin_memory: False

configs/AttUnet.yaml ADDED Viewed

	@@ -0,0 +1,86 @@

+seed_everything: 42
+# ---------------------------- TRAINER -------------------------------------------
+trainer:
+  default_root_dir: "checkpoint/AttUnet"
+  precision: "16-mixed"
+  min_epochs: 1
+  max_epochs: 100
+  accelerator: cuda
+  # limit_train_batches: 10
+  devices: [5]
+  # strategy: ddp
+  num_nodes: 1
+  enable_progress_bar: true
+  sync_batchnorm: True
+  enable_checkpointing: True
+  # debugging
+  fast_dev_run: false
+  logger:
+    class_path: pytorch_lightning.loggers.CSVLogger
+    init_args:
+      save_dir: "checkpoint/AttUnet/logs"
+      name: null
+      version: null
+  callbacks:
+    - class_path: pytorch_lightning.callbacks.LearningRateMonitor
+      init_args:
+        logging_interval: "step"
+    - class_path: pytorch_lightning.callbacks.ModelCheckpoint
+      init_args:
+        dirpath: "checkpoint/AttUnet/checkpoints"
+        monitor: "val/mse" # name of the logged metric which determines when model is improving
+        mode: "min" # "max" means higher metric value is better, can be also "min"
+        save_top_k: 1 # save k best models (determined by above metric)
+        save_last: True # additionally always save model from last epoch
+        verbose: False
+        filename: "epoch_{epoch:03d}"
+        auto_insert_metric_name: False
+    - class_path: pytorch_lightning.callbacks.EarlyStopping
+      init_args:
+        monitor: "val/mse" # name of the logged metric which determines when model is improving
+        mode: "min" # "max" means higher metric value is better, can be also "min"
+        patience: 10 # how many validation epochs of not improving until training stops
+        min_delta: 0. # minimum change in the monitored metric needed to qualify as an improvement
+    - class_path: pytorch_lightning.callbacks.RichModelSummary
+      init_args:
+        max_depth: -1
+    - class_path: pytorch_lightning.callbacks.RichProgressBar
+# ---------------------------- MODEL -------------------------------------------
+model:
+  pretrained_path: ""
+  beta_1: 0.9
+  beta_2: 0.99
+  lr: 5e-4
+  weight_decay: 1e-5
+  warmup_epochs: 10
+  max_epochs: 50
+  warmup_start_lr: 1e-8
+  eta_min: 1e-8
+  net:
+    model_type: "AttUnet"
+    num_channel: 1
+# ---------------------------- DATA -------------------------------------------
+data:
+  dir_data: "/data/data_WF/ablation/ablation_time"
+  ablation: "time"
+  sat_size: 20
+  rad_size: 640
+  time_points_rad: 1
+  time_points_sat: 1
+  sat_inp_vars: ["total_precipitation"]
+  sat_out_vars: "total_precipitation"
+  rad_inp_vars: ["precipitation"]
+  rad_out_vars: "precipitation"
+  hours_predicted: 3
+  batch_size: 32
+  num_workers: 4
+  pin_memory: False

configs/Nothing.yaml ADDED Viewed

	@@ -0,0 +1,86 @@

+seed_everything: 42
+# ---------------------------- TRAINER -------------------------------------------
+trainer:
+  default_root_dir: "checkpoint/Nothing"
+  precision: "16-mixed"
+  min_epochs: 1
+  max_epochs: 100
+  accelerator: cuda
+  # limit_train_batches: 10
+  devices: [4]
+  # strategy: ddp
+  num_nodes: 1
+  enable_progress_bar: true
+  sync_batchnorm: True
+  enable_checkpointing: True
+  # debugging
+  fast_dev_run: false
+  logger:
+    class_path: pytorch_lightning.loggers.CSVLogger
+    init_args:
+      save_dir: "checkpoint/Nothing/logs"
+      name: null
+      version: null
+  callbacks:
+    - class_path: pytorch_lightning.callbacks.LearningRateMonitor
+      init_args:
+        logging_interval: "step"
+    - class_path: pytorch_lightning.callbacks.ModelCheckpoint
+      init_args:
+        dirpath: "checkpoint/Nothing/checkpoints"
+        monitor: "val/sat" # name of the logged metric which determines when model is improving
+        mode: "min" # "max" means higher metric value is better, can be also "min"
+        save_top_k: 1 # save k best models (determined by above metric)
+        save_last: True # additionally always save model from last epoch
+        verbose: False
+        filename: "epoch_{epoch:03d}"
+        auto_insert_metric_name: False
+    - class_path: pytorch_lightning.callbacks.EarlyStopping
+      init_args:
+        monitor: "val/sat" # name of the logged metric which determines when model is improving
+        mode: "min" # "max" means higher metric value is better, can be also "min"
+        patience: 10 # how many validation epochs of not improving until training stops
+        min_delta: 0. # minimum change in the monitored metric needed to qualify as an improvement
+    - class_path: pytorch_lightning.callbacks.RichModelSummary
+      init_args:
+        max_depth: -1
+    - class_path: pytorch_lightning.callbacks.RichProgressBar
+# ---------------------------- MODEL -------------------------------------------
+model:
+  pretrained_path: ""
+  beta_1: 0.9
+  beta_2: 0.99
+  lr: 5e-4
+  weight_decay: 1e-5
+  warmup_epochs: 10
+  max_epochs: 50
+  warmup_start_lr: 1e-8
+  eta_min: 1e-8
+  net:
+    model_type: "Nothing"
+    num_channel: 1
+# ---------------------------- DATA -------------------------------------------
+data:
+  dir_data: "/data/data_WF/ablation/ablation_time"
+  ablation: "time"
+  sat_size: 20
+  rad_size: 640
+  time_points_rad: 1
+  time_points_sat: 1
+  sat_inp_vars: ["total_precipitation"]
+  sat_out_vars: "total_precipitation"
+  rad_inp_vars: ["precipitation"]
+  rad_out_vars: "precipitation"
+  hours_predicted: 3
+  batch_size: 8
+  num_workers: 4
+  pin_memory: False

configs/R2Unet.yaml ADDED Viewed

	@@ -0,0 +1,95 @@

+seed_everything: 42
+# ---------------------------- TRAINER -------------------------------------------
+trainer:
+  default_root_dir: "checkpoint/R2Unet"
+  precision: "16-mixed"
+  min_epochs: 1
+  max_epochs: 100
+  accelerator: cuda
+  # limit_train_batches: 10
+  devices: [4]
+  # strategy: ddp
+  num_nodes: 1
+  enable_progress_bar: true
+  sync_batchnorm: True
+  enable_checkpointing: True
+  # debugging
+  fast_dev_run: false
+  logger:
+    class_path: pytorch_lightning.loggers.CSVLogger
+    init_args:
+      save_dir: "checkpoint/R2Unet/logs"
+      name: null
+      version: null
+  callbacks:
+    - class_path: pytorch_lightning.callbacks.LearningRateMonitor
+      init_args:
+        logging_interval: "step"
+    - class_path: pytorch_lightning.callbacks.ModelCheckpoint
+      init_args:
+        dirpath: "checkpoint/R2Unet/checkpoints"
+        monitor: "val/mse" # name of the logged metric which determines when model is improving
+        mode: "min" # "max" means higher metric value is better, can be also "min"
+        save_top_k: 1 # save k best models (determined by above metric)
+        save_last: True # additionally always save model from last epoch
+        verbose: False
+        filename: "epoch_{epoch:03d}"
+        auto_insert_metric_name: False
+    - class_path: pytorch_lightning.callbacks.EarlyStopping
+      init_args:
+        monitor: "val/mse" # name of the logged metric which determines when model is improving
+        mode: "min" # "max" means higher metric value is better, can be also "min"
+        patience: 10 # how many validation epochs of not improving until training stops
+        min_delta: 0. # minimum change in the monitored metric needed to qualify as an improvement
+    - class_path: pytorch_lightning.callbacks.RichModelSummary
+      init_args:
+        max_depth: -1
+    - class_path: pytorch_lightning.callbacks.RichProgressBar
+      init_args:
+        theme:
+          description: "white"
+          progress_bar: "#6206E0"
+          progress_bar_finished: "green"
+          progress_bar_pulse: "cyan"
+          batch_progress: "white"
+          time: "grey42"
+          processing_speed: "grey70"
+          metrics: "white"
+# ---------------------------- MODEL -------------------------------------------
+model:
+  pretrained_path: ""
+  beta_1: 0.9
+  beta_2: 0.99
+  lr: 5e-4
+  weight_decay: 1e-5
+  warmup_epochs: 10
+  max_epochs: 50
+  warmup_start_lr: 1e-8
+  eta_min: 1e-8
+  net:
+    model_type: "R2Unet"
+    num_channel: 1
+# ---------------------------- DATA -------------------------------------------
+data:
+  dir_data: "/data/data_WF/ablation/ablation_time"
+  ablation: "time"
+  sat_size: 20
+  rad_size: 640
+  time_points_rad: 1
+  time_points_sat: 1
+  sat_inp_vars: ["total_precipitation"]
+  sat_out_vars: "total_precipitation"
+  rad_inp_vars: ["precipitation"]
+  rad_out_vars: "precipitation"
+  hours_predicted: 3
+  batch_size: 32
+  num_workers: 4
+  pin_memory: False

configs/Unet.yaml ADDED Viewed

	@@ -0,0 +1,104 @@

+seed_everything: 42
+# ---------------------------- TRAINER -------------------------------------------
+trainer:
+  default_root_dir: "checkpoint/Unet"
+  precision: "16-mixed"
+  min_epochs: 1
+  max_epochs: 100
+  accelerator: cuda
+  # limit_train_batches: 10
+  devices: [5]
+  # strategy: ddp
+  num_nodes: 1
+  enable_progress_bar: true
+  sync_batchnorm: True
+  enable_checkpointing: True
+  # debugging
+  fast_dev_run: false
+  logger:
+    - class_path: pytorch_lightning.loggers.WandbLogger
+      init_args:
+        project: "NhaBe"
+        name: "UnetNhaBe"
+        save_dir: "checkpoint/Unet/wandb_logs"
+        log_model: False
+    - class_path: pytorch_lightning.loggers.CSVLogger
+      init_args:
+        save_dir: "checkpoint/Unet/csv_logs"
+        name: null
+        version: null
+  callbacks:
+    - class_path: pytorch_lightning.callbacks.LearningRateMonitor
+      init_args:
+        logging_interval: "step"
+    - class_path: pytorch_lightning.callbacks.ModelCheckpoint
+      init_args:
+        dirpath: "checkpoint/Unet/checkpoints"
+        monitor: "val/mse" # name of the logged metric which determines when model is improving
+        mode: "min" # "max" means higher metric value is better, can be also "min"
+        save_top_k: 1 # save k best models (determined by above metric)
+        save_last: True # additionally always save model from last epoch
+        verbose: False
+        filename: "epoch_{epoch:03d}"
+        auto_insert_metric_name: False
+    - class_path: pytorch_lightning.callbacks.EarlyStopping
+      init_args:
+        monitor: "val/mse" # name of the logged metric which determines when model is improving
+        mode: "min" # "max" means higher metric value is better, can be also "min"
+        patience: 10 # how many validation epochs of not improving until training stops
+        min_delta: 0. # minimum change in the monitored metric needed to qualify as an improvement
+    - class_path: pytorch_lightning.callbacks.RichModelSummary
+      init_args:
+        max_depth: -1
+    # - class_path: pytorch_lightning.callbacks.RichProgressBar
+    #   init_args:
+    #     theme:
+    #       description: "white"
+    #       progress_bar: "#6206E0"
+    #       progress_bar_finished: "green"
+    #       progress_bar_pulse: "cyan"
+    #       batch_progress: "white"
+    #       time: "grey42"
+    #       processing_speed: "grey70"
+    #       metrics: "white"
+# ---------------------------- MODEL -------------------------------------------
+model:
+  pretrained_path: ""
+  beta_1: 0.9
+  beta_2: 0.99
+  lr: 5e-4
+  weight_decay: 1e-5
+  warmup_epochs: 10
+  max_epochs: 50
+  warmup_start_lr: 1e-8
+  eta_min: 1e-8
+  net:
+    model_type: "Unet"
+    rad_channel: 1
+    sat_channel: 1
+    rad_size: 400
+    sat_size: 25
+# ---------------------------- DATA -------------------------------------------
+data:
+  dir_data: "/data/weather2025/NhaBe/"
+  ablation: "no"
+  rad_size: 400
+  sat_size: 25
+  time_points_rad: 1
+  time_points_sat: 1
+  sat_inp_vars: "total_precipitation"
+  sat_out_vars: "total_precipitation"
+  rad_inp_vars: "precipitation"
+  rad_out_vars: "precipitation"
+  hours_predicted: 3
+  batch_size: 1
+  num_workers: 8
+  pin_memory: False

pyproject.toml ADDED Viewed

	@@ -0,0 +1,21 @@

+[build-system]
+requires = ["setuptools", "setuptools-scm"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "FusionModel"
+version = "0.3.1"
+authors =[
+    {name="Khanh Vinh Bui", email="khanhvinhbui0512@gmail.com"},
+    {name="Hong Trang Le", email="lhtrang@hcmut.edu.vn"}
+]
+description = ""
+readme = "README.md"
+requires-python = ">=3.10"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+]
+[tool.setuptools.packages.find]
+where = ["."]

src/__pycache__/arch.cpython-310.pyc ADDED Viewed

Binary file (12.5 kB). View file

src/__pycache__/arch.cpython-312.pyc ADDED Viewed

Binary file (27.2 kB). View file

src/__pycache__/arch.cpython-38.pyc ADDED Viewed

Binary file (12.8 kB). View file

src/__pycache__/datamodule.cpython-310.pyc ADDED Viewed

Binary file (9.34 kB). View file

src/__pycache__/datamodule.cpython-312.pyc ADDED Viewed

Binary file (23.8 kB). View file

src/__pycache__/lr_scheduler.cpython-310.pyc ADDED Viewed

Binary file (3.73 kB). View file

src/__pycache__/lr_scheduler.cpython-312.pyc ADDED Viewed

Binary file (5.55 kB). View file

src/__pycache__/metric.cpython-310.pyc ADDED Viewed

Binary file (1.74 kB). View file

src/__pycache__/metric.cpython-312.pyc ADDED Viewed

Binary file (3.67 kB). View file

src/__pycache__/module.cpython-310.pyc ADDED Viewed

Binary file (6.35 kB). View file

src/__pycache__/module.cpython-312.pyc ADDED Viewed

Binary file (11.4 kB). View file

src/__pycache__/module.cpython-38.pyc ADDED Viewed

Binary file (6.22 kB). View file

src/__pycache__/train.cpython-38.pyc ADDED Viewed

Binary file (983 Bytes). View file

src/arch.py ADDED Viewed

	@@ -0,0 +1,473 @@

+import os
+import numpy as np
+import glob
+import math
+import torch
+import torchvision
+# For everything
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.nn import CrossEntropyLoss, Linear, MSELoss
+from torch.nn import ConvTranspose2d, Conv2d, MaxPool2d, BatchNorm2d
+# For our model
+import torchvision.models as models
+from torchvision import datasets, transforms
+from torchvision.io import read_image
+from torch.utils.data import DataLoader, Dataset
+import torch.optim as optim
+from torch.autograd import Variable
+from torchsummary import summary
+class Nothing(nn.Module):
+    def __init__(self):
+        super(Nothing,self).__init__()
+    def forward(self, radar,satellite):
+        return radar, satellite
+class ConvBlock(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(ConvBlock, self).__init__()
+        # number of input channels is a number of filters in the previous layer
+        # number of output channels is a number of filters in the current layer
+        # "same" convolutions
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding='same', bias=True),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding='same', bias=True),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, x):
+        x = self.conv(x)
+        return x
+class UpConv(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(UpConv, self).__init__()
+        self.up = nn.Sequential(
+            nn.Upsample(scale_factor=2),
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding='same', bias=True),
+            nn.BatchNorm2d(out_channels),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, x):
+        x = self.up(x)
+        return x
+class AttentionBlock(nn.Module):
+    """Attention block with learnable parameters"""
+    def __init__(self, F_g, F_l, n_coefficients):
+        """
+        :param F_g: number of feature maps (channels) in previous layer
+        :param F_l: number of feature maps in corresponding encoder layer, transferred via skip connection
+        :param n_coefficients: number of learnable multi-dimensional attention coefficients
+        """
+        super(AttentionBlock, self).__init__()
+        self.W_gate = nn.Sequential(
+            nn.Conv2d(F_g, n_coefficients, kernel_size=1, stride=1, padding=0, bias=True),
+            nn.BatchNorm2d(n_coefficients)
+        )
+        self.W_x = nn.Sequential(
+            nn.Conv2d(F_l, n_coefficients, kernel_size=1, stride=1, padding=0, bias=True),
+            nn.BatchNorm2d(n_coefficients)
+        )
+        self.psi = nn.Sequential(
+            nn.Conv2d(n_coefficients, 1, kernel_size=1, stride=1, padding=0, bias=True),
+            nn.BatchNorm2d(1),
+            nn.Sigmoid()
+        )
+        self.relu = nn.ReLU(inplace=True)
+    def forward(self, gate, skip_connection):
+        """
+        :param gate: gating signal from previous layer
+        :param skip_connection: activation from corresponding encoder layer
+        :return: output activations
+        """
+        g1 = self.W_gate(gate)
+        x1 = self.W_x(skip_connection)
+        psi = self.relu(g1 + x1)
+        psi = self.psi(psi)
+        out = skip_connection * psi
+        return out
+class Recurrent_block(nn.Module):
+    def __init__(self,ch_out,t=2):
+        super(Recurrent_block,self).__init__()
+        self.t = t
+        self.ch_out = ch_out
+        self.conv = nn.Sequential(
+            nn.Conv2d(ch_out,ch_out,kernel_size=3,stride=1,padding='same',bias=True),
+		    nn.BatchNorm2d(ch_out),
+			nn.ReLU(inplace=True)
+        )
+    def forward(self,x):
+        for i in range(self.t):
+            if i==0:
+                x1 = self.conv(x)
+            x1 = self.conv(x+x1)
+        return x1
+class RRCNN_block(nn.Module):
+    def __init__(self,ch_in,ch_out,t=2):
+        super(RRCNN_block,self).__init__()
+        self.RCNN = nn.Sequential(
+            Recurrent_block(ch_out,t=t),
+            Recurrent_block(ch_out,t=t)
+        )
+        self.Conv_1x1 = nn.Conv2d(ch_in,ch_out,kernel_size=1,stride=1,padding='same')
+    def forward(self,x):
+        x = self.Conv_1x1(x)
+        x1 = self.RCNN(x)
+        return x+x1
+class single_conv(nn.Module):
+    def __init__(self,ch_in,ch_out):
+        super(single_conv,self).__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(ch_in, ch_out, kernel_size=3,stride=1,padding='same',bias=True),
+            nn.BatchNorm2d(ch_out),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self,x):
+        x = self.conv(x)
+        return x
+class Unet(nn.Module):
+    def __init__(self, rad_channel=1,sat_channel=1, rad_size=640, sat_size=20):
+        super(Unet, self).__init__()
+        assert rad_size % sat_size == 0, "rad_size must be divisible by sat_size"
+        ratio = rad_size // sat_size
+        assert (ratio & (ratio - 1)) == 0, "rad_size/sat_size must be a power of 2"
+        self.n_pool = int(math.log2(ratio))
+        # Encoder
+        self.encoder_blocks = nn.ModuleList()
+        self.pools = nn.ModuleList()
+        for i in range(self.n_pool):
+            in_c = rad_channel * (2**(i))
+            out_c = rad_channel * (2**(i+1))
+            self.encoder_blocks.append(ConvBlock(in_c, out_c))
+            if i < self.n_pool:
+                self.pools.append(nn.MaxPool2d(kernel_size=2, stride=2))
+        # Bottleneck
+        self.mid_conv_1 = single_conv(out_c, out_c)
+        self.mid_conv_2 = single_conv(sat_channel, out_c)
+        self.mid_merge = ConvBlock(2*out_c, out_c)
+        # Decoder
+        self.up_convs = nn.ModuleList()
+        self.decoder_blocks = nn.ModuleList()
+        for i in reversed(range(self.n_pool)):
+            up_in = rad_channel * (2**(i+2))
+            up_out = rad_channel * (2**(i+1))
+            self.up_convs.append(UpConv(up_in, up_out))
+            self.decoder_blocks.append(ConvBlock(up_in, up_out))
+        self.final_decoder = ConvBlock(4*rad_channel, 2*rad_channel)
+        self.out_conv_R = nn.Conv2d(2*rad_channel, rad_channel, kernel_size=1, padding='same')
+        self.out_conv_S = nn.Conv2d(out_c, sat_channel, kernel_size=1, padding='same')
+    def forward(self, radar, satellite):
+        # Encoding
+        enc_feats = []
+        x = radar
+        for i, block in enumerate(self.encoder_blocks):
+            x = block(x)
+            enc_feats.append(x)
+            if i < self.n_pool:
+                x = self.pools[i](x)
+        # Bottleneck
+        x = F.relu(self.mid_conv_1(x))
+        y = F.relu(self.mid_conv_2(satellite))
+        x = torch.cat((x, y), dim=1)
+        mid_out = self.mid_merge(x)
+        pred_sat = self.out_conv_S(mid_out)
+        # Decoding
+        x = x  # input to decoder is original x before mid_merge
+        for i in range(self.n_pool):
+            x = self.up_convs[i](x)
+            x = torch.cat((enc_feats[self.n_pool - 1 - i], x), dim=1)
+            x = self.decoder_blocks[i](x)
+        x = torch.cat((enc_feats[0], x), dim=1)
+        x = self.final_decoder(x)
+        pred_rad = self.out_conv_R(x)
+        return pred_rad, pred_sat
+# class Unet(nn.Module):
+#     def __init__(self,num_channel=1,rad_size=640,sat_size=20):
+#         super(Unet, self).__init__()
+#         self.MaxPool = nn.MaxPool2d(kernel_size=2, stride=2)
+#         self.Conv1 = ConvBlock(1, 2*num_channel)
+#         self.Conv2 = ConvBlock(2*num_channel, 4*num_channel)
+#         self.Conv3 = ConvBlock(4*num_channel, 8*num_channel)
+#         self.Conv4 = ConvBlock(8*num_channel, 16*num_channel)
+#         self.Conv5 = ConvBlock(16*num_channel, 32*num_channel)
+#         self.mid_conv_1 = single_conv(32*num_channel,32*num_channel)
+#         self.mid_conv_2 = single_conv(2, 32*num_channel)
+#         self.MidConv = ConvBlock(64*num_channel, 32*num_channel)
+#         self.out_conv_S = Conv2d(32*num_channel, 1, (1, 1), padding= 'same')
+#         self.Up5 = UpConv(64*num_channel, 32*num_channel)
+#         self.UpConv5 = ConvBlock(64*num_channel, 32*num_channel)
+#         self.Up4 = UpConv(32*num_channel, 16*num_channel)
+#         self.UpConv4 = ConvBlock(32*num_channel, 16*num_channel)
+#         self.Up3 = UpConv(16*num_channel, 8*num_channel)
+#         self.UpConv3 = ConvBlock(16*num_channel, 8*num_channel)
+#         self.Up2 = UpConv(8*num_channel, 4*num_channel)
+#         self.UpConv2 = ConvBlock(8*num_channel, 4*num_channel)
+#         self.Up1 = UpConv(4*num_channel, 2*num_channel)
+#         self.UpConv1 = ConvBlock(4*num_channel, 2*num_channel)
+#         self.out_conv_R = Conv2d(2*num_channel, 1, (1, 1), padding= 'same')
+#     def forward(self, radar,satellite):
+#         e1 = self.Conv1(radar)
+#         e2 = self.MaxPool(e1)
+#         e2 = self.Conv2(e2)
+#         e3 = self.MaxPool(e2)
+#         e3 = self.Conv3(e3)
+#         e4 = self.MaxPool(e3)
+#         e4 = self.Conv4(e4)
+#         e5 = self.MaxPool(e4)
+#         e5 = self.Conv5(e5)
+#         e6 = self.MaxPool(e5)
+#         X = F.relu(self.mid_conv_1(e6))
+#         Y = F.relu(self.mid_conv_2(satellite))
+#         X = torch.cat((X,Y),1)
+#         Y = self.MidConv(X)
+#         pred_satellite = self.out_conv_S(Y)
+#         d5 = self.Up5(X)
+#         d5 = torch.cat((e5, d5), dim=1)
+#         d5 = self.UpConv5(d5)
+#         d4 = self.Up4(d5)
+#         d4 = torch.cat((e4, d4), dim=1)
+#         d4 = self.UpConv4(d4)
+#         d3 = self.Up3(d4)
+#         d3 = torch.cat((e3, d3), dim=1)
+#         d3 = self.UpConv3(d3)
+#         d2 = self.Up2(d3)
+#         d2 = torch.cat((e2, d2), dim=1)
+#         d2 = self.UpConv2(d2)
+#         d1 = self.Up1(d2)
+#         d0 = torch.cat((e1, d1), dim=1)
+#         d0 = self.UpConv1(d0)
+#         pred_radar = self.out_conv_R(d0)
+#         return pred_radar, pred_satellite
+class R2Unet(nn.Module):
+    def __init__(self,num_channel=1,t=2):
+        super(R2Unet, self).__init__()
+        self.MaxPool = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.RRCNN1 = RRCNN_block(5,2*num_channel,t=t)
+        self.RRCNN2 = RRCNN_block(2*num_channel,4*num_channel,t=t)
+        self.RRCNN3 = RRCNN_block(4*num_channel,8*num_channel,t=t)
+        self.RRCNN4 = RRCNN_block(8*num_channel,16*num_channel,t=t)
+        self.RRCNN5 = RRCNN_block(16*num_channel,32*num_channel,t=t)
+        self.mid_conv_1 = single_conv(32*num_channel,32*num_channel)
+        self.mid_conv_2 = single_conv(2, 32*num_channel)
+        self.MidConv = RRCNN_block(64*num_channel, 32*num_channel)
+        self.out_conv_S = Conv2d(32*num_channel, 1, (1, 1), padding= 'same')
+        self.Up5 = UpConv(64*num_channel, 32*num_channel)
+        self.UpRRCNN5 = RRCNN_block(64*num_channel, 32*num_channel)
+        self.Up4 = UpConv(32*num_channel, 16*num_channel)
+        self.UpRRCNN4 = RRCNN_block(32*num_channel, 16*num_channel)
+        self.Up3 = UpConv(16*num_channel, 8*num_channel)
+        self.UpRRCNN3 = RRCNN_block(16*num_channel, 8*num_channel)
+        self.Up2 = UpConv(8*num_channel, 4*num_channel)
+        self.UpRRCNN2 = RRCNN_block(8*num_channel, 4*num_channel)
+        self.Up1 = UpConv(4*num_channel, 2*num_channel)
+        self.UpRRCNN1 = RRCNN_block(4*num_channel, 2*num_channel)
+        self.out_conv_R = Conv2d(2*num_channel, 1, (1, 1), padding= 'same')
+    def forward(self, radar,satellite):
+        e1 = self.RRCNN1(radar)
+        e2 = self.MaxPool(e1)
+        e2 = self.RRCNN2(e2)
+        e3 = self.MaxPool(e2)
+        e3 = self.RRCNN3(e3)
+        e4 = self.MaxPool(e3)
+        e4 = self.RRCNN4(e4)
+        e5 = self.MaxPool(e4)
+        e5 = self.RRCNN5(e5)
+        e6 = self.MaxPool(e5)
+        X = F.relu(self.mid_conv_1(e6))
+        Y = F.relu(self.mid_conv_2(satellite))
+        X = torch.cat((X,Y),1)
+        Y = self.MidConv(X)
+        pred_satellite = self.out_conv_S(Y)
+        d5 = self.Up5(X)
+        d5 = torch.cat((e5, d5), dim=1)
+        d5 = self.UpRRCNN5(d5)
+        d4 = self.Up4(d5)
+        d4 = torch.cat((e4, d4), dim=1)
+        d4 = self.UpRRCNN4(d4)
+        d3 = self.Up3(d4)
+        d3 = torch.cat((e3, d3), dim=1)
+        d3 = self.UpRRCNN3(d3)
+        d2 = self.Up2(d3)
+        d2 = torch.cat((e2, d2), dim=1)
+        d2 = self.UpRRCNN2(d2)
+        d1 = self.Up1(d2)
+        d0 = torch.cat((e1, d1), dim=1)
+        d0 = self.UpRRCNN1(d0)
+        pred_radar = self.out_conv_R(d0)
+        return pred_radar, pred_satellite
+class AttUnet(nn.Module):
+    def __init__(self,num_channel=1):
+        super(AttUnet, self).__init__()
+        self.MaxPool = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.Conv1 = ConvBlock(5, 2*num_channel)
+        self.Conv2 = ConvBlock(2*num_channel, 4*num_channel)
+        self.Conv3 = ConvBlock(4*num_channel, 8*num_channel)
+        self.Conv4 = ConvBlock(8*num_channel, 16*num_channel)
+        self.Conv5 = ConvBlock(16*num_channel, 32*num_channel)
+        self.mid_conv_1 = single_conv(32*num_channel,32*num_channel)
+        self.mid_conv_2 = single_conv(2, 32*num_channel)
+        self.MidConv = ConvBlock(64*num_channel, 32*num_channel)
+        self.out_conv_S = Conv2d(32*num_channel, 1, (1, 1), padding= 'same')
+        self.Up5 = UpConv(64*num_channel, 32*num_channel)
+        self.Att5 = AttentionBlock(F_g=32*num_channel, F_l=32*num_channel, n_coefficients=16*num_channel)
+        self.UpConv5 = ConvBlock(64*num_channel, 32*num_channel)
+        self.Up4 = UpConv(32*num_channel, 16*num_channel)
+        self.Att4 = AttentionBlock(F_g=16*num_channel, F_l=16*num_channel, n_coefficients=8*num_channel)
+        self.UpConv4 = ConvBlock(32*num_channel, 16*num_channel)
+        self.Up3 = UpConv(16*num_channel, 8*num_channel)
+        self.Att3 = AttentionBlock(F_g=8*num_channel, F_l=8*num_channel, n_coefficients=4*num_channel)
+        self.UpConv3 = ConvBlock(16*num_channel, 8*num_channel)
+        self.Up2 = UpConv(8*num_channel, 4*num_channel)
+        self.Att2 = AttentionBlock(F_g=4*num_channel, F_l=4*num_channel, n_coefficients=2*num_channel)
+        self.UpConv2 = ConvBlock(8*num_channel, 4*num_channel)
+        self.Up1 = UpConv(4*num_channel, 2*num_channel)
+        self.Att1 = AttentionBlock(F_g=2*num_channel, F_l=2*num_channel, n_coefficients=1*num_channel)
+        self.UpConv1 = ConvBlock(4*num_channel, 2*num_channel)
+        self.out_conv_R = Conv2d(2*num_channel, 1, (1, 1), padding= 'same')
+    def forward(self, radar,satellite):
+        e1 = self.Conv1(radar)
+        e2 = self.MaxPool(e1)
+        e2 = self.Conv2(e2)
+        e3 = self.MaxPool(e2)
+        e3 = self.Conv3(e3)
+        e4 = self.MaxPool(e3)
+        e4 = self.Conv4(e4)
+        e5 = self.MaxPool(e4)
+        e5 = self.Conv5(e5)
+        e6 = self.MaxPool(e5)
+        X = F.relu(self.mid_conv_1(e6))
+        Y = F.relu(self.mid_conv_2(satellite))
+        X = torch.cat((X,Y),1)
+        Y = self.MidConv(X)
+        pred_satellite = self.out_conv_S(Y)
+        d5 = self.Up5(X)
+        s4 = self.Att5(gate=d5, skip_connection=e5)
+        d5 = torch.cat((s4, d5), dim=1) # concatenate attention-weighted skip connection with previous layer output
+        d5 = self.UpConv5(d5)
+        d4 = self.Up4(d5)
+        s3 = self.Att4(gate=d4, skip_connection=e4)
+        d4 = torch.cat((s3, d4), dim=1)
+        d4 = self.UpConv4(d4)
+        d3 = self.Up3(d4)
+        s2 = self.Att3(gate=d3, skip_connection=e3)
+        d3 = torch.cat((s2, d3), dim=1)
+        d3 = self.UpConv3(d3)
+        d2 = self.Up2(d3)
+        s1 = self.Att2(gate=d2, skip_connection=e2)
+        d2 = torch.cat((s1, d2), dim=1)
+        d2 = self.UpConv2(d2)
+        d1 = self.Up1(d2)
+        s0 = self.Att1(gate=d1, skip_connection=e1)
+        d0 = torch.cat((s0, d1), dim=1)
+        d0 = self.UpConv1(d0)
+        pred_radar = self.out_conv_R(d0)
+        return pred_radar, pred_satellite
+class AttR2Unet(nn.Module):
+    def __init__(self,num_channel=1,t=2):
+        super(AttR2Unet, self).__init__()
+        self.MaxPool = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.RRCNN1 = RRCNN_block(5, 2*num_channel)
+        self.RRCNN2 = RRCNN_block(2*num_channel, 4*num_channel)
+        self.RRCNN3 = RRCNN_block(4*num_channel, 8*num_channel)
+        self.RRCNN4 = RRCNN_block(8*num_channel, 16*num_channel)
+        self.RRCNN5 = RRCNN_block(16*num_channel, 32*num_channel)
+        self.mid_conv_1 = single_conv(32*num_channel,32*num_channel)
+        self.mid_conv_2 = single_conv(2, 32*num_channel)
+        self.MidConv = RRCNN_block(64*num_channel, 32*num_channel)
+        self.out_conv_S = Conv2d(32*num_channel, 1, (1, 1), padding= 'same')
+        self.Up5 = UpConv(64*num_channel, 32*num_channel)
+        self.Att5 = AttentionBlock(F_g=32*num_channel, F_l=32*num_channel, n_coefficients=16*num_channel)
+        self.UpRRCNN5 = RRCNN_block(64*num_channel, 32*num_channel)
+        self.Up4 = UpConv(32*num_channel, 16*num_channel)
+        self.Att4 = AttentionBlock(F_g=16*num_channel, F_l=16*num_channel, n_coefficients=8*num_channel)
+        self.UpRRCNN4 = RRCNN_block(32*num_channel, 16*num_channel)
+        self.Up3 = UpConv(16*num_channel, 8*num_channel)
+        self.Att3 = AttentionBlock(F_g=8*num_channel, F_l=8*num_channel, n_coefficients=4*num_channel)
+        self.UpRRCNN3 = RRCNN_block(16*num_channel, 8*num_channel)
+        self.Up2 = UpConv(8*num_channel, 4*num_channel)
+        self.Att2 = AttentionBlock(F_g=4*num_channel, F_l=4*num_channel, n_coefficients=2*num_channel)
+        self.UpRRCNN2 = RRCNN_block(8*num_channel, 4*num_channel)
+        self.Up1 = UpConv(4*num_channel, 2*num_channel)
+        self.Att1 = AttentionBlock(F_g=2*num_channel, F_l=2*num_channel, n_coefficients=1*num_channel)
+        self.UpRRCNN1 = RRCNN_block(4*num_channel, 2*num_channel)
+        self.out_conv_R = Conv2d(2*num_channel, 1, (1, 1), padding= 'same')
+    def forward(self, radar,satellite):
+        e1 = self.RRCNN1(radar)
+        e2 = self.MaxPool(e1)
+        e2 = self.RRCNN2(e2)
+        e3 = self.MaxPool(e2)
+        e3 = self.RRCNN3(e3)
+        e4 = self.MaxPool(e3)
+        e4 = self.RRCNN4(e4)
+        e5 = self.MaxPool(e4)
+        e5 = self.RRCNN5(e5)
+        e6 = self.MaxPool(e5)
+        X = F.relu(self.mid_conv_1(e6))
+        Y = F.relu(self.mid_conv_2(satellite))
+        X = torch.cat((X,Y),1)
+        Y = self.MidConv(X)
+        pred_satellite = self.out_conv_S(Y)
+        d5 = self.Up5(X)
+        s4 = self.Att5(gate=d5, skip_connection=e5)
+        d5 = torch.cat((s4, d5), dim=1) # concatenate attention-weighted skip connection with previous layer output
+        d5 = self.UpRRCNN5(d5)
+        d4 = self.Up4(d5)
+        s3 = self.Att4(gate=d4, skip_connection=e4)
+        d4 = torch.cat((s3, d4), dim=1)
+        d4 = self.UpRRCNN4(d4)
+        d3 = self.Up3(d4)
+        s2 = self.Att3(gate=d3, skip_connection=e3)
+        d3 = torch.cat((s2, d3), dim=1)
+        d3 = self.UpRRCNN3(d3)
+        d2 = self.Up2(d3)
+        s1 = self.Att2(gate=d2, skip_connection=e2)
+        d2 = torch.cat((s1, d2), dim=1)
+        d2 = self.UpRRCNN2(d2)
+        d1 = self.Up1(d2)
+        s0 = self.Att1(gate=d1, skip_connection=e1)
+        d0 = torch.cat((s0, d1), dim=1)
+        d0 = self.UpRRCNN1(d0)
+        pred_radar = self.out_conv_R(d0)
+        return pred_radar, pred_satellite
+class Network(nn.Module):
+    def __init__(self,model_type:str,rad_channel:int, sat_channel:int,rad_size:int,sat_size:int):
+        super(Network,self).__init__()
+        print(model_type)
+        if(model_type == "Nothing"):
+            self.net = Nothing()
+        elif(model_type == "Unet"):
+            self.net = Unet(rad_channel=rad_channel,sat_channel=sat_channel,rad_size=rad_size,sat_size=sat_size)
+        elif(model_type == "Unet"):
+            self.net = Unet(rad_channel=rad_channel,sat_channel=sat_channel,rad_size=rad_size,sat_size=sat_size)
+        elif(model_type == "R2Unet"):
+            self.net = R2Unet(rad_channel=rad_channel,sat_channel=sat_channel,rad_size=rad_size,sat_size=sat_size)
+        elif(model_type == "AttUnet"):
+            self.net = AttUnet(rad_channel=rad_channel,sat_channel=sat_channel,rad_size=rad_size,sat_size=sat_size)
+        elif(model_type == "AttR2Unet"):
+            self.net = AttR2Unet(rad_channel=rad_channel,sat_channel=sat_channel,rad_size=rad_size,sat_size=sat_size)
+        else:
+            raise ValueError("model_type is wrong")
+    def forward(self, radar,satellite):
+        pred_radar, pred_satellite = self.net.forward(radar,satellite)
+        return pred_radar, pred_satellite

src/datamodule.py ADDED Viewed

	@@ -0,0 +1,341 @@

+import os
+from torch.utils.data import DataLoader, Dataset, random_split
+import numpy as np
+from datetime import datetime, timedelta
+from torchvision import transforms
+from pytorch_lightning import LightningDataModule, LightningModule
+from  pytorch_lightning.cli import LightningCLI
+from torch.utils.data import DataLoader
+import pytorch_lightning as L
+import torch
+import torch.nn as nn
+from typing import Tuple, Dict, List
+# import optim
+class DataReader(Dataset):
+    def __init__(
+            self, dir_data : str,
+            type_data : str,
+            rad_attribute : str ,
+            sat_attribute : str,
+            hours_predicted : int,
+            rad_predicted : str ,
+            sat_predicted : str ,
+            time_points_rad : int,
+            time_points_sat : int,
+            rad_size:int,
+            sat_size:int,
+            ablation = str,
+        ):
+        super().__init__()
+        self.base_dir=dir_data
+        self.type_data = type_data
+        if self.type_data == "train":
+            self.dir_data=os.path.join(dir_data, "train")
+        elif self.type_data =="test":
+            self.dir_data=os.path.join(dir_data, 'test')
+        elif self.type_data =="val":
+            self.dir_data=os.path.join(dir_data, 'val')
+        else:
+            raise ValueError("Type must be train, test or val")
+        self.sat_size = sat_size
+        self.rad_size = rad_size
+        self.hours_predicted = hours_predicted
+        self.rad_attribute = rad_attribute
+        self.sat_attribute = sat_attribute
+        self.rad_predicted = rad_predicted
+        self.sat_predicted = sat_predicted
+        self.time_points_rad = time_points_rad
+        self.time_points_sat = time_points_sat
+        self.transform_rad = None
+        self.transform_sat = None
+        self.ablation = ablation
+        # Create path for img
+        self.rad_mean = np.load(os.path.join(self.base_dir,'rad_mean.npz'))[self.rad_attribute]
+        self.rad_std =  np.load(os.path.join(self.base_dir,'rad_std.npz'))[self.rad_attribute]
+        self.sat_mean = np.load(os.path.join(self.base_dir,'sat_mean.npz'))[self.sat_attribute]
+        self.sat_std =  np.load(os.path.join(self.base_dir,'sat_std.npz'))[self.sat_attribute]
+        #Create transform
+        self.create_transform()
+        #Get list img
+        if(self.ablation == "no"):
+            self.list_img_dir = self.gen_list_img_no(self.dir_data)
+        elif(self.ablation == "rad"):
+            self.list_img_dir = self.gen_list_img_rad(self.dir_data)
+        elif(self.ablation == "sat"):
+            self.list_img_dir = self.gen_list_img_sat(self.dir_data)
+        elif(self.ablation == "full"):
+            self.list_img_dir = self.gen_list_img_full(self.dir_data)
+        elif(self.ablation == "time"):
+            self.list_img_dir = self.gen_list_img_time(self.dir_data)
+        else:
+            raise ValueError("Ablation must be no,rad,sat,full")
+        print(f"Number of {self.type_data } samples:",len(self.list_img_dir))
+    def __len__(self):
+        return len(self.list_img_dir)
+    def __getitem__(self, idx):
+        if(self.transform_rad):
+            inp_rad = self.transform_rad(np.load(self.list_img_dir[idx][0])[self.rad_attribute])
+            out_rad = self.transform_rad(np.load(self.list_img_dir[idx][2])[self.rad_predicted])
+        if(self.transform_sat):
+            inp_sat = self.transform_sat(np.load(self.list_img_dir[idx][1])[self.sat_attribute])
+            out_sat = self.transform_sat(np.load(self.list_img_dir[idx][3])[self.sat_predicted][0])
+        return inp_rad,inp_sat.float(),out_rad, out_sat.float()
+    def create_transform(self):
+        self.transform_rad = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(self.rad_mean,self.rad_std)
+        ])
+        self.transform_sat = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(self.sat_mean[0],self.sat_std[0]),
+        ])
+        # print("SAT_MEAN", self.sat_mean, self.sat_std)
+    def gen_list_img_no(self,path):
+        pred_rad_dir =os.path.join(path,"pred_rad")
+        pred_sat_dir = os.path.join(path,"pred_sat")
+        GT_rad_dir = os.path.join(path ,"rad")
+        GT_sat_dir = os.path.join(path,"sat")
+        list_dir = []
+        # print()
+        # print(len(os.listdir(pred_rad_dir)))
+        for name in os.listdir(pred_rad_dir):
+            temp = []
+            if(not name.endswith("00.npz")  and not name.endswith("03.npz")):
+                continue
+            temp.append(os.path.join(pred_rad_dir,name))
+            pred_sat_path = os.path.join(pred_sat_dir,name[0:-6]+name[-4:])
+            GT_rad_path = os.path.join(GT_rad_dir, name)
+            GT_sat_path = os.path.join(GT_sat_dir, name[0:-6] + name[-4:])
+            if(os.path.isfile(pred_sat_path)):
+                temp.append(pred_sat_path)
+            if(os.path.isfile(GT_rad_path)):
+                temp.append(GT_rad_path)
+            if(os.path.isfile(GT_sat_path)):
+                temp.append(GT_sat_path)
+            if(len(temp) == 4):
+                list_dir.append(temp)
+        return list_dir
+    def gen_list_img_rad(self,path):
+        pred_rad_dir = os.path.join(path,"rad")
+        pred_sat_dir = os.path.join(path,"pred_sat")
+        GT_rad_dir = os.path.join(path ,"rad")
+        GT_sat_dir = os.path.join(path,"sat")
+        list_dir = []
+        for name in os.listdir(pred_rad_dir):
+            temp = []
+            if( not name.endswith("00.npz")  and not name.endswith("03.npz")):
+                continue
+            temp_date = self.get_date_time(name)
+            temp.append(os.path.join(pred_rad_dir,name))
+            pred_sat_path = os.path.join(pred_sat_dir, (temp_date+timedelta(hours=self.hours_predicted)).strftime('%Y%m%d%H') + '.npz')
+            GT_rad_path = os.path.join(GT_rad_dir, (temp_date+timedelta(hours=self.hours_predicted)).strftime('%Y%m%d%H%M') + '.npz')
+            GT_sat_path = os.path.join(GT_sat_dir, (temp_date+timedelta(hours=self.hours_predicted)).strftime('%Y%m%d%H') + '.npz')
+            if(os.path.isfile(pred_sat_path)):
+                temp.append(pred_sat_path)
+            if(os.path.isfile(GT_rad_path)):
+                temp.append(GT_rad_path)
+            if(os.path.isfile(GT_sat_path)):
+                temp.append(GT_sat_path)
+            if(len(temp) == 4):
+                list_dir.append(temp)
+        return list_dir
+    def gen_list_img_sat(self,path):
+        pred_rad_dir = os.path.join(path,"pred_rad")
+        pred_sat_dir = os.path.join(path,"sat")
+        GT_rad_dir = os.path.join(path ,"rad")
+        GT_sat_dir = os.path.join(path,"sat")
+        list_dir = []
+        for name in os.listdir(pred_rad_dir):
+            temp = []
+            if( not name.endswith("00.npz")  and not name.endswith("03.npz")):
+                continue
+            temp_date = self.get_date_time(name)
+            temp.append(os.path.join(pred_rad_dir,name))
+            pred_sat_path = os.path.join(pred_sat_dir, (temp_date-timedelta(hours=self.hours_predicted)).strftime('%Y%m%d%H') + '.npz')
+            GT_rad_path = os.path.join(GT_rad_dir, name)
+            GT_sat_path = os.path.join(GT_sat_dir, name[0:-6] + name[-4:])
+            if(os.path.isfile(pred_sat_path)):
+                temp.append(pred_sat_path)
+            if(os.path.isfile(GT_rad_path)):
+                temp.append(GT_rad_path)
+            if(os.path.isfile(GT_sat_path)):
+                temp.append(GT_sat_path)
+            if(len(temp) == 4):
+                list_dir.append(temp)
+        return list_dir
+    def gen_list_img_full(self,path):
+        pred_rad_dir = os.path.join(path,"rad")
+        pred_sat_dir = os.path.join(path,"sat")
+        GT_rad_dir = os.path.join(path ,"rad")
+        GT_sat_dir = os.path.join(path,"sat")
+        list_dir = []
+        for name in os.listdir(pred_rad_dir):
+            temp = []
+            if(not name.endswith("00.npz")  and not name.endswith("03.npz")):
+                continue
+            temp_date = self.get_date_time(name)
+            temp.append(os.path.join(pred_rad_dir,name))
+            pred_sat_path = os.path.join(pred_sat_dir,temp_date.strftime('%Y%m%d%H')+'.npz')
+            GT_rad_path = os.path.join(GT_rad_dir, (temp_date+timedelta(hours=self.hours_predicted)).strftime('%Y%m%d%H%M') + '.npz')
+            GT_sat_path = os.path.join(GT_sat_dir, (temp_date+timedelta(hours=self.hours_predicted)).strftime('%Y%m%d%H') + '.npz')
+            if(os.path.isfile(pred_sat_path)):
+                temp.append(pred_sat_path)
+            if(os.path.isfile(GT_rad_path)):
+                temp.append(GT_rad_path)
+            if(os.path.isfile(GT_sat_path)):
+                temp.append(GT_sat_path)
+            if(len(temp) == 4):
+                list_dir.append(temp)
+        return list_dir
+    def gen_list_img_time(self,path):
+        pred_rad_dir =os.path.join(path,"pred_rad")
+        pred_sat_dir = os.path.join(path,"pred_sat")
+        GT_rad_dir = os.path.join(path ,"rad")
+        GT_sat_dir = os.path.join(path,"sat")
+        list_dir = []
+        for name in os.listdir(pred_rad_dir):
+            temp = [[],[],[],[]]
+            temp_date = self.get_date_time(name)
+            if(not name.endswith("00.npz")  and not name.endswith("03.npz")):
+                continue
+            for i in range(4):
+                temp_path = os.path.join(GT_rad_dir, (temp_date+timedelta(minutes=-210+i*10)).strftime('%Y%m%d%H%M') + '.npz')
+                if(os.path.isfile(temp_path)): temp[0].append(temp_path)
+            for i in range(1):
+                temp_path = os.path.join(GT_sat_dir, (temp_date+timedelta(minutes=-180+i*10)).strftime('%Y%m%d%H') + '.npz')
+                if(os.path.isfile(temp_path)): temp[1].append(temp_path)
+            temp[0].append(os.path.join(pred_rad_dir,name))
+            pred_sat_path = os.path.join(pred_sat_dir,name[0:-6]+name[-4:])
+            GT_rad_path = os.path.join(GT_rad_dir, name)
+            GT_sat_path = os.path.join(GT_sat_dir, name[0:-6] + name[-4:])
+            if(os.path.isfile(pred_sat_path)):
+                temp[1].append(pred_sat_path)
+            if(os.path.isfile(GT_rad_path)):
+                temp[2].append(GT_rad_path)
+            if(os.path.isfile(GT_sat_path)):
+                temp[3].append(GT_sat_path)
+            if(len(temp[0]) == 5 and len(temp[1]) == 2 and len(temp[2]) == 1 and len(temp[3]) == 1):
+                list_dir.append(temp)
+        return list_dir
+    def get_date_time(self,name):
+        year=int(name[0:4])
+        month=int(name[4:6])
+        day=int(name[6:8])
+        hour=int(name[8:10])
+        minute = int(name[10:12])
+        return datetime(year,month,day,hour,minute)
+class WeatherForecastDataModule(LightningDataModule):
+    def __init__(
+            self,
+            dir_data: str,
+            batch_size:int ,
+            hours_predicted :int,
+            num_workers:int ,
+            pin_memory: bool ,
+            time_points_rad : int,
+            time_points_sat : int,
+            sat_inp_vars: str,
+            sat_out_vars : str,
+            sat_size: int,
+            rad_inp_vars : str,
+            rad_out_vars : str,
+            rad_size: int,
+            ablation: str,
+        ):
+        super().__init__()
+        # this line allows to access init params with 'self.hparams' attribute
+        self.save_hyperparameters(logger=True)
+        self.data_train = None
+        self.data_test = None
+        self.data_val = None
+        self.rad_mean = np.load(os.path.join(self.hparams.dir_data,'rad_mean.npz'))[self.hparams.rad_inp_vars]
+        self.rad_std =  np.load(os.path.join(self.hparams.dir_data,'rad_std.npz'))[self.hparams.rad_inp_vars]
+        self.sat_mean = np.load(os.path.join(self.hparams.dir_data,'sat_mean.npz'))[self.hparams.sat_inp_vars]
+        self.sat_std =  np.load(os.path.join(self.hparams.dir_data,'sat_std.npz'))[self.hparams.sat_inp_vars]
+    def prepare_data(self):
+        pass
+    def setup(self, stage):
+        # print(self.hparams.dir_data)
+        self.data_train = DataReader(
+            dir_data=self.hparams.dir_data,
+            type_data= "train",
+            rad_attribute = self.hparams.rad_inp_vars,
+            sat_attribute = self.hparams.sat_inp_vars,
+            hours_predicted = self.hparams.hours_predicted,
+            rad_predicted = self.hparams.rad_out_vars,
+            sat_predicted = self.hparams.sat_out_vars,
+            time_points_rad = self.hparams.time_points_rad,
+            time_points_sat = self.hparams.time_points_sat,
+            sat_size = self.hparams.sat_size,
+            rad_size = self.hparams.rad_size,
+            ablation = self.hparams.ablation
+        )
+        self.data_test = DataReader(
+            dir_data=self.hparams.dir_data,
+            type_data ="test",
+            rad_attribute = self.hparams.rad_inp_vars,
+            sat_attribute = self.hparams.sat_inp_vars,
+            hours_predicted = self.hparams.hours_predicted,
+            rad_predicted = self.hparams.rad_out_vars,
+            sat_predicted = self.hparams.sat_out_vars,
+            time_points_rad = self.hparams.time_points_rad,
+            time_points_sat = self.hparams.time_points_sat,
+            sat_size = self.hparams.sat_size,
+            rad_size = self.hparams.rad_size,
+            ablation = self.hparams.ablation
+        )
+        self.data_val = DataReader(
+            dir_data=self.hparams.dir_data,
+            type_data = "val",
+            rad_attribute = self.hparams.rad_inp_vars,
+            sat_attribute = self.hparams.sat_inp_vars,
+            hours_predicted = self.hparams.hours_predicted,
+            rad_predicted = self.hparams.rad_out_vars,
+            sat_predicted = self.hparams.sat_out_vars,
+            time_points_rad = self.hparams.time_points_rad,
+            time_points_sat = self.hparams.time_points_sat,
+            sat_size = self.hparams.sat_size,
+            rad_size = self.hparams.rad_size,
+            ablation = self.hparams.ablation
+        )
+    def train_dataloader(self):
+        return DataLoader(
+            self.data_train,
+            batch_size=self.hparams.batch_size,
+            num_workers=self.hparams.num_workers,
+            drop_last=False,
+            pin_memory=self.hparams.pin_memory,
+            shuffle=True,
+        )
+    def val_dataloader(self):
+        return DataLoader(
+            self.data_val,
+            batch_size=self.hparams.batch_size,
+            num_workers=self.hparams.num_workers,
+            drop_last=False,
+            pin_memory=self.hparams.pin_memory,
+            shuffle=False,
+        )
+    def test_dataloader(self):
+        return DataLoader(
+            self.data_test,
+            batch_size=self.hparams.batch_size,
+            num_workers=self.hparams.num_workers,
+            drop_last=False,
+            pin_memory=self.hparams.pin_memory,
+            shuffle=False,
+        )

src/lr_scheduler.py ADDED Viewed

	@@ -0,0 +1,94 @@

+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+import math
+import warnings
+from typing import List
+from torch.optim import Optimizer
+from torch.optim.lr_scheduler import _LRScheduler
+class LinearWarmupCosineAnnealingLR(_LRScheduler):
+    """Sets the learning rate of each parameter group to follow a linear warmup schedule between
+    warmup_start_lr and base_lr followed by a cosine annealing schedule between base_lr and
+    eta_min."""
+    def __init__(
+        self,
+        optimizer: Optimizer,
+        warmup_epochs: int,
+        max_epochs: int,
+        warmup_start_lr: float = 0.0,
+        eta_min: float = 0.0,
+        last_epoch: int = -1,
+    ) -> None:
+        """
+        Args:
+            optimizer (Optimizer): Wrapped optimizer.
+            warmup_epochs (int): Maximum number of iterations for linear warmup
+            max_epochs (int): Maximum number of iterations
+            warmup_start_lr (float): Learning rate to start the linear warmup. Default: 0.
+            eta_min (float): Minimum learning rate. Default: 0.
+            last_epoch (int): The index of last epoch. Default: -1.
+        """
+        self.warmup_epochs = warmup_epochs
+        self.max_epochs = max_epochs
+        self.warmup_start_lr = warmup_start_lr
+        self.eta_min = eta_min
+        super().__init__(optimizer, last_epoch)
+    def get_lr(self) -> List[float]:
+        """Compute learning rate using chainable form of the scheduler."""
+        if not self._get_lr_called_within_step:
+            warnings.warn(
+                "To get the last learning rate computed by the scheduler, " "please use `get_last_lr()`.",
+                UserWarning,
+            )
+        if self.last_epoch == self.warmup_epochs:
+            return self.base_lrs
+        if self.last_epoch == 0:
+            return [self.warmup_start_lr] * len(self.base_lrs)
+        if self.last_epoch < self.warmup_epochs:
+            return [
+                group["lr"] + (base_lr - self.warmup_start_lr) / (self.warmup_epochs - 1)
+                for base_lr, group in zip(self.base_lrs, self.optimizer.param_groups)
+            ]
+        if (self.last_epoch - 1 - self.max_epochs) % (2 * (self.max_epochs - self.warmup_epochs)) == 0:
+            return [
+                group["lr"]
+                + (base_lr - self.eta_min) * (1 - math.cos(math.pi / (self.max_epochs - self.warmup_epochs))) / 2
+                for base_lr, group in zip(self.base_lrs, self.optimizer.param_groups)
+            ]
+        return [
+            (1 + math.cos(math.pi * (self.last_epoch - self.warmup_epochs) / (self.max_epochs - self.warmup_epochs)))
+            / (
+                1
+                + math.cos(
+                    math.pi * (self.last_epoch - self.warmup_epochs - 1) / (self.max_epochs - self.warmup_epochs)
+                )
+            )
+            * (group["lr"] - self.eta_min)
+            + self.eta_min
+            for group in self.optimizer.param_groups
+        ]
+    def _get_closed_form_lr(self) -> List[float]:
+        """Called when epoch is passed as a param to the `step` function of the scheduler."""
+        if self.last_epoch < self.warmup_epochs:
+            return [
+                self.warmup_start_lr
+                + self.last_epoch * (base_lr - self.warmup_start_lr) / max(1, self.warmup_epochs - 1)
+                for base_lr in self.base_lrs
+            ]
+        return [
+            self.eta_min
+            + 0.5
+            * (base_lr - self.eta_min)
+            * (1 + math.cos(math.pi * (self.last_epoch - self.warmup_epochs) / (self.max_epochs - self.warmup_epochs)))
+            for base_lr in self.base_lrs
+        ]

src/metric.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import numpy as np
+import torch
+def MSE(pred,GT,lat,clim):
+    return torch.mean((pred-GT)**2)
+def RMSE(pred,GT,lat,clim):
+    return torch.sqrt(torch.mean((pred-GT)**2))
+def MAE(pred,GT,lat,clim):
+    return torch.mean(torch.abs(pred-GT))
+def WMSE(pred, y, lat,clim):
+    if(lat is None):return 0
+    error = (pred - y) ** 2  # [N, C, H, W]
+    # lattitude weights
+    w_lat = np.cos(np.deg2rad(lat))
+    w_lat = w_lat / w_lat.mean()
+    w_lat = torch.from_numpy(w_lat).unsqueeze(0).unsqueeze(-1).to(dtype=error.dtype, device=error.device)  # (1, H, 1)
+    loss =  (error * w_lat).mean()
+    return loss
+def WRMSE(pred,GT,lat,clim):
+    if(lat is None):return 0
+    error = (pred - GT) ** 2  # [B, V, H, W]
+    # lattitude weights
+    w_lat = np.cos(np.deg2rad(lat))
+    w_lat = w_lat / w_lat.mean()  # (H, )
+    w_lat = torch.from_numpy(w_lat).unsqueeze(0).unsqueeze(-1).to(dtype=error.dtype, device=error.device)
+    loss = torch.mean(
+                torch.sqrt(torch.mean(error* w_lat, dim=(-2, -1)))
+            )
+    return loss
+def ACC(pred,GT,lat,clim):
+    if(lat is None):return 0
+    w_lat = np.cos(np.deg2rad(lat))
+    w_lat = w_lat / w_lat.mean()  # (H, )
+    w_lat = torch.from_numpy(w_lat).unsqueeze(0).unsqueeze(-1).to(dtype=pred.dtype, device=pred.device)  # [1, H, 1]
+    #  = torch.mean(y, dim=(0, 1), keepdim=True)
+    clim = clim.to(device=GT.device).unsqueeze(0)
+    pred = pred - clim
+    GT = GT - clim
+    pred_prime = pred - torch.mean(pred)
+    GT_prime = GT - torch.mean(GT)
+    loss = torch.sum(w_lat * pred_prime * GT_prime) / torch.sqrt(
+        torch.sum(w_lat * pred_prime**2) * torch.sum(w_lat * GT_prime**2)
+    )
+    return loss

src/module.py ADDED Viewed

	@@ -0,0 +1,168 @@

+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+# credits: https://github.com/ashleve/lightning-hydra-template/blob/main/src/models/mnist_module.py
+from typing import Any
+import os
+import numpy as np
+import torch
+from pytorch_lightning import LightningModule
+from torchvision.transforms import transforms
+from lr_scheduler import LinearWarmupCosineAnnealingLR
+from arch import Network
+from metric import (
+    MSE,RMSE,MAE,ACC,WMSE,WRMSE
+)
+class WeatherForecastModule(LightningModule):
+    """Lightning module for global forecasting with the ClimaX model.
+    Args:
+        net: Deeplearning model.
+        pretrained_path (str, optional): Path to pre-trained checkpoint.
+        lr (float, optional): Learning rate.
+        beta_1 (float, optional): Beta 1 for AdamW.
+        beta_2 (float, optional): Beta 2 for AdamW.
+        weight_decay (float, optional): Weight decay for AdamW.
+        warmup_epochs (int, optional): Number of warmup epochs.
+        max_epochs (int, optional): Number of total epochs.
+        warmup_start_lr (float, optional): Starting learning rate for warmup.
+        eta_min (float, optional): Minimum learning rate.
+    """
+    def __init__(
+        self,
+        net: Network,
+        pretrained_path: str = "",
+        lr: float = 5e-4,
+        beta_1: float = 0.9,
+        beta_2: float = 0.99,
+        weight_decay: float = 1e-5,
+        warmup_epochs: int = 10000,
+        max_epochs: int = 200000,
+        warmup_start_lr: float = 1e-8,
+        eta_min: float = 1e-8,
+    ):
+        super().__init__()
+        self.save_hyperparameters(logger=True, ignore=["net"])
+        self.net = net
+        if len(pretrained_path) > 0:
+            self.load_pretrained_weights(pretrained_path)
+    def load_pretrained_weights(self, pretrained_path):
+        self.net.load_state_dict(torch.load(pretrained_path))
+    def set_path(self,path):
+        self.path = path
+    def set_size(self,rad_size,sat_size):
+        self.rad_size = rad_size
+        self.sat_size = sat_size
+    def set_lat(self):
+        lat = np.load(os.path.join(self.path,'sat_lat.npy'))
+        self.sat_lat = lat[lat.shape[-1]//2-self.sat_size//2:lat.shape[-1]//2+self.sat_size//2]
+        # self.sat_lat = np.load(os.path.join(self.path,'sat_lat.npy'))
+        # self.sat_clim = torch.from_numpy(np.load(os.path.join(self.path,'sat_clim.npz'))['total_precipitation'])
+    def set_clim(self):
+        ##########
+        rad_clim = np.load(os.path.join(self.path,'rad_clim.npz'))['precipitation']
+        sat_clim = np.load(os.path.join(self.path,'sat_clim.npz'))['total_precipitation']
+        self.rad_clim = torch.from_numpy(rad_clim)
+        self.sat_clim = torch.from_numpy(sat_clim)
+    def set_normalize(self):
+        self.rad_mean = np.load(os.path.join(self.path,'rad_mean.npz'))['precipitation']
+        self.rad_std  = np.load(os.path.join(self.path,'rad_std.npz'))['precipitation']
+        self.sat_mean = np.load(os.path.join(self.path,'sat_mean.npz'))['total_precipitation']
+        self.sat_std  = np.load(os.path.join(self.path,'sat_std.npz'))['total_precipitation']
+    def set_denormalize(self):
+        self.rad_denormalization = transforms.Normalize(-self.rad_mean/self.rad_std,1/self.rad_std)
+        self.sat_denormalization = transforms.Normalize(-self.sat_mean/self.sat_std,1/self.sat_std)
+    def training_step(self, batch: Any, batch_idx: int):
+        inp_rad, inp_sat, out_rad, out_sat = batch
+        pred_rad,pred_sat = self.net.forward(inp_rad,inp_sat)
+        loss = torch.nn.MSELoss()
+        loss_rad = loss(pred_rad,out_rad)
+        loss_sat = loss(pred_sat,out_sat)
+        loss_tot = loss_rad + loss_sat
+        self.log("train/rad", loss_rad, prog_bar=True, logger = True)
+        self.log("train/sat", loss_sat, prog_bar=True, logger = True)
+        self.log("train/mse", loss_tot, prog_bar=True, logger = True)
+        return loss_tot
+    def validation_step(self, batch: Any, batch_idx: int):
+        inp_rad, inp_sat, out_rad, out_sat = batch
+        pred_rad,pred_sat = self.net.forward(inp_rad,inp_sat)
+        loss = torch.nn.MSELoss()
+        with torch.no_grad():
+            loss_rad = loss(pred_rad,out_rad)
+            loss_sat = loss(pred_sat,out_sat)
+            loss_tot = loss_rad + loss_sat
+        self.log("val/rad", loss_rad, prog_bar=True, logger = True)
+        self.log("val/sat", loss_sat, prog_bar=True, logger = True)
+        self.log("val/mse", loss_tot, prog_bar=True, logger = True)
+        return loss_tot
+    def test_step(self, batch: Any, batch_idx: int):
+        inp_rad, inp_sat, out_rad, out_sat = batch
+        pred_rad,pred_sat = self.net.forward(inp_rad,inp_sat)
+        loss = torch.nn.MSELoss()
+        self.rad_denormalization(out_rad)
+        rad_metric = [MSE,RMSE,ACC,MAE]
+        sat_metric = [MSE,WMSE,RMSE,WRMSE,ACC,MAE]
+        with torch.no_grad():
+            loss_rad = loss(self.rad_denormalization(pred_rad),self.rad_denormalization(out_rad))
+            loss_sat = loss(self.sat_denormalization(pred_sat),self.sat_denormalization(out_sat))
+            loss_tot = loss_rad + loss_sat
+            self.log(f"test/rad", loss_rad, prog_bar=True, logger = True)
+            self.log("test/sat", loss_sat, prog_bar=True, logger = True)
+            self.log("test/mse", loss_tot, prog_bar=True, logger = True)
+        for met in rad_metric:
+            loss_rad = met(
+                self.rad_denormalization(pred_rad),
+                self.rad_denormalization(out_rad),
+                np.ones(self.rad_size),
+                self.rad_clim
+            )
+            self.log(f"test/rad_{met.__name__}", loss_rad, prog_bar=True, logger = True)
+        for met in sat_metric:
+            loss_sat = met(
+                self.sat_denormalization(pred_sat),
+                self.sat_denormalization(out_sat),
+                self.sat_lat,
+                self.sat_clim,
+            )
+            self.log(f"test/sat_{met.__name__}", loss_sat, prog_bar=True, logger = True)
+        return loss_tot
+    def configure_optimizers(self):
+        decay = []
+        no_decay = []
+        for name, m in self.named_parameters():
+            if "var_embed" in name or "pos_embed" in name or "time_pos_embed" in name:
+                no_decay.append(m)
+            else:
+                decay.append(m)
+        optimizer = torch.optim.AdamW(
+            [
+                {
+                    "params": decay,
+                    "lr": self.hparams.lr,
+                    "betas": (self.hparams.beta_1, self.hparams.beta_2),
+                    "weight_decay": self.hparams.weight_decay,
+                },
+                {
+                    "params": no_decay,
+                    "lr": self.hparams.lr,
+                    "betas": (self.hparams.beta_1, self.hparams.beta_2),
+                    "weight_decay": 0,
+                },
+            ]
+        )
+        lr_scheduler = LinearWarmupCosineAnnealingLR(
+            optimizer,
+            self.hparams.warmup_epochs,
+            self.hparams.max_epochs,
+            self.hparams.warmup_start_lr,
+            self.hparams.eta_min,
+        )
+        scheduler = {"scheduler": lr_scheduler, "interval": "step", "frequency": 1}
+        return {"optimizer": optimizer, "lr_scheduler": scheduler}

src/rad_clim.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import os
+import torch
+import math
+import numpy as np
+from copy import deepcopy
+path_load = '/data/weather2025/NhaBe/train/rad'
+path_save = '/data/weather2025/NhaBe'
+num = 0
+rad_clim = {}
+for name in os.listdir(path_load):
+    file = np.load(os.path.join(path_load,name))
+    for field in file.keys():
+        if(num == 0):
+            rad_clim[field] = file[field]
+        else:
+            rad_clim[field] = rad_clim[field] + file[field]
+    num += 1
+    print(num,end='\r')
+for field in rad_clim.keys():
+    rad_clim[field] = rad_clim[field]/num
+    rad_clim[field] = np.expand_dims(rad_clim[field],axis =0)
+    print(rad_clim[field].shape)
+np.savez(os.path.join(path_save,'rad_clim.npz'),**rad_clim)