pokkiri's picture
Create config.py
af44c09 verified
"""
Configuration module for biomass prediction model.
This recreates the config class needed for unpickling the model package.
Author: najahpokkiri
Date: 2025-05-17
"""
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Union, Tuple
from datetime import datetime
@dataclass
class BiomassPipelineConfig:
"""Configuration for Biomass Prediction Pipeline"""
# Core settings
mode: str = "full"
random_seed: int = 42
project_name: str = "biomass-prediction"
created_by: str = "najahpokkiri"
created_date: str = "2025-05-16"
# Data paths
raster_pairs: List[tuple] = field(default_factory=lambda: [
("/teamspace/studios/dl2/clean/data/s1_s2_l8_palsar_ch_dem_yellapur_2020.tif",
"/teamspace/studios/dl2/clean/data/agbd_yellapur_reprojected_1.tif"),
("/teamspace/studios/dl2/clean/data/s1_s2_l8_palsar_ch_dem_betul_2020.tif",
"/teamspace/studios/dl2/clean/data/agbd_betul_reprojected_1.tif"),
("/teamspace/studios/dl2/clean/data/s1_s2_l8_palsar_ch_goa_achankumar_2020_clipped.tif",
"/teamspace/studios/dl2/clean/data/02_Achanakmar_AGB40_band1_onImgGrid.tif"),
("/teamspace/studios/dl2/clean/data/s1_s2_l8_palsar_ch_dem_Khaoyai_2020_clipped.tif",
"/teamspace/studios/dl2/clean/data/05_Khaoyai_AGB40_band1_onImgGrid.tif")
])
data_dir: str = "data/"
results_dir: str = field(default_factory=lambda: f"biomass_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}")
# Feature engineering settings
use_log_transform: bool = True
epsilon: float = 1.0
use_advanced_indices: bool = True
use_texture_features: bool = True
use_spatial_features: bool = True
use_pca_features: bool = True
pca_components: int = 25
scale_method: str = "robust"
outlier_removal: bool = True
outlier_threshold: float = 3.0
# Model settings
model_type: str = "StableResNet"
dropout_rate: float = 0.2
batch_size: int = 256
learning_rate: float = 0.001
weight_decay: float = 1e-5
max_epochs: int = 300
patience: int = 30
test_size: float = 0.15
val_size: float = 0.15
# Deployment settings
huggingface_repo: str = "najahpokkiri/biomass-prediction"
quantize_model: bool = False
max_samples_per_tile: Optional[int] = None
def __post_init__(self):
"""Adjust settings based on mode"""
if self.mode == "test":
# Quick testing settings
self.raster_pairs = self.raster_pairs[:1] # Use only first tile
self.max_epochs = 10
self.batch_size = 32
self.use_texture_features = False
self.use_spatial_features = False
self.use_pca_features = False
self.max_samples_per_tile = 5000
self.pca_components = 10
else:
# Full mode settings
self.max_samples_per_tile = None
# Create a default config instance
default_config = BiomassPipelineConfig()