File size: 3,017 Bytes
af44c09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
"""
Configuration module for biomass prediction model.
This recreates the config class needed for unpickling the model package.

Author: najahpokkiri
Date: 2025-05-17
"""
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Union, Tuple
from datetime import datetime

@dataclass
class BiomassPipelineConfig:
    """Configuration for Biomass Prediction Pipeline"""
    # Core settings
    mode: str = "full"
    random_seed: int = 42
    project_name: str = "biomass-prediction"
    created_by: str = "najahpokkiri"
    created_date: str = "2025-05-16"
    
    # Data paths
    raster_pairs: List[tuple] = field(default_factory=lambda: [
        ("/teamspace/studios/dl2/clean/data/s1_s2_l8_palsar_ch_dem_yellapur_2020.tif",
         "/teamspace/studios/dl2/clean/data/agbd_yellapur_reprojected_1.tif"),
        ("/teamspace/studios/dl2/clean/data/s1_s2_l8_palsar_ch_dem_betul_2020.tif",
         "/teamspace/studios/dl2/clean/data/agbd_betul_reprojected_1.tif"),
        ("/teamspace/studios/dl2/clean/data/s1_s2_l8_palsar_ch_goa_achankumar_2020_clipped.tif",
         "/teamspace/studios/dl2/clean/data/02_Achanakmar_AGB40_band1_onImgGrid.tif"),
        ("/teamspace/studios/dl2/clean/data/s1_s2_l8_palsar_ch_dem_Khaoyai_2020_clipped.tif",
         "/teamspace/studios/dl2/clean/data/05_Khaoyai_AGB40_band1_onImgGrid.tif")
    ])
    data_dir: str = "data/"
    results_dir: str = field(default_factory=lambda: f"biomass_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}")
    
    # Feature engineering settings
    use_log_transform: bool = True
    epsilon: float = 1.0
    use_advanced_indices: bool = True
    use_texture_features: bool = True
    use_spatial_features: bool = True
    use_pca_features: bool = True
    pca_components: int = 25
    scale_method: str = "robust"
    outlier_removal: bool = True
    outlier_threshold: float = 3.0
    
    # Model settings
    model_type: str = "StableResNet"
    dropout_rate: float = 0.2
    batch_size: int = 256
    learning_rate: float = 0.001
    weight_decay: float = 1e-5
    max_epochs: int = 300
    patience: int = 30
    test_size: float = 0.15
    val_size: float = 0.15
    
    # Deployment settings
    huggingface_repo: str = "najahpokkiri/biomass-prediction"
    quantize_model: bool = False
    max_samples_per_tile: Optional[int] = None
    
    def __post_init__(self):
        """Adjust settings based on mode"""
        if self.mode == "test":
            # Quick testing settings
            self.raster_pairs = self.raster_pairs[:1]  # Use only first tile
            self.max_epochs = 10
            self.batch_size = 32
            self.use_texture_features = False
            self.use_spatial_features = False
            self.use_pca_features = False
            self.max_samples_per_tile = 5000
            self.pca_components = 10
        else:
            # Full mode settings
            self.max_samples_per_tile = None

# Create a default config instance
default_config = BiomassPipelineConfig()