Halimhailey commited on Oct 7

Commit

ffa1f50

verified ·

1 Parent(s): f23946e

Upload folder using huggingface_hub

Browse files

Files changed (33) hide show

src/.DS_Store +0 -0
src/.env +7 -0
src/__init__.py +1 -0
src/__pycache__/__init__.cpython-310.pyc +0 -0
src/__pycache__/demand_filtering.cpython-310.pyc +0 -0
src/__pycache__/demand_validation_viz.cpython-310.pyc +0 -0
src/config/__init__.py +0 -0
src/config/__pycache__/__init__.cpython-310.pyc +0 -0
src/config/__pycache__/constants.cpython-310.pyc +0 -0
src/config/__pycache__/optimization_config.cpython-310.pyc +0 -0
src/config/constants.py +202 -0
src/config/optimization_config.py +450 -0
src/config/paths.yaml +19 -0
src/demand_filtering.py +413 -0
src/demand_validation_viz.py +278 -0
src/models/__init__.py +1 -0
src/models/__pycache__/__init__.cpython-310.pyc +0 -0
src/models/__pycache__/optimizer_real.cpython-310.pyc +0 -0
src/models/optimizer_real.py +780 -0
src/preprocess/__init__.py +0 -0
src/preprocess/__pycache__/__init__.cpython-310.pyc +0 -0
src/preprocess/__pycache__/extract.cpython-310.pyc +0 -0
src/preprocess/__pycache__/hierarchy_parser.cpython-310.pyc +0 -0
src/preprocess/__pycache__/transform.cpython-310.pyc +0 -0
src/preprocess/data_preprocess.py +39 -0
src/preprocess/excel_to_csv_converter.py +111 -0
src/preprocess/extract.py +194 -0
src/preprocess/hierarchy_parser.py +219 -0
src/preprocess/kit_composition_cleaner.py +259 -0
src/preprocess/transform.py +79 -0
src/visualization/__init__.py +5 -0
src/visualization/hierarchy_dashboard.py +554 -0
src/visualization/kit_relationships.py +629 -0

src/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

src/.env ADDED Viewed

	@@ -0,0 +1,7 @@

+POSTGRES_USER=halim
+POSTGRES_PASSWORD=haileyhalimunicef
+POSTGRES_DB=rosteroptimization
+PGADMIN_EMAIL=admin@example.com
+PGADMIN_PASSWORD=admin
+DB_PORT=5432
+PGADMIN_PORT=5050

src/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

src/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (152 Bytes). View file

src/__pycache__/demand_filtering.cpython-310.pyc ADDED Viewed

Binary file (12.3 kB). View file

src/__pycache__/demand_validation_viz.cpython-310.pyc ADDED Viewed

Binary file (9.85 kB). View file

src/config/__init__.py ADDED Viewed

File without changes

src/config/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (159 Bytes). View file

src/config/__pycache__/constants.cpython-310.pyc ADDED Viewed

Binary file (5.3 kB). View file

src/config/__pycache__/optimization_config.cpython-310.pyc ADDED Viewed

Binary file (12.3 kB). View file

src/config/constants.py ADDED Viewed

	@@ -0,0 +1,202 @@

+"""
+Constants module for Supply Roster Optimization Tool
+Replaces hard-coded magic numbers with meaningful named constants
+"""
+from src.preprocess import extract
+class ShiftType:
+    """
+    Shift type constants to replace magic numbers
+    1 = Regular, 2 = Evening, 3 = Overtime
+    """
+    REGULAR = 1
+    EVENING = 2
+    OVERTIME = 3
+    # All available shifts
+    ALL_SHIFTS = [REGULAR, EVENING, OVERTIME]
+    # Common shift combinations
+    REGULAR_AND_OVERTIME = [REGULAR, OVERTIME]  # Normal mode (no evening)
+    @classmethod
+    def get_name(cls, shift_id):
+        """Get human-readable name for shift ID"""
+        names = {
+            cls.REGULAR: "Regular",
+            cls.EVENING: "Evening",
+            cls.OVERTIME: "Overtime"
+        }
+        return names.get(shift_id, "Unknown")
+    @classmethod
+    def get_all_names(cls):
+        """Get dictionary mapping shift IDs to names"""
+        return {
+            cls.REGULAR: "Regular",
+            cls.EVENING: "Evening",
+            cls.OVERTIME: "Overtime"
+        }
+class LineType:
+    """
+    Line type constants to replace magic numbers
+    6 = Long Line, 7 = Mini Load
+    """
+    LONG_LINE = 6
+    MINI_LOAD = 7
+    # All available line types
+    ALL_LINE_TYPES = [LONG_LINE, MINI_LOAD]
+    @classmethod
+    def get_name(cls, line_id):
+        """Get human-readable name for line type ID"""
+        names = {
+            cls.LONG_LINE: "Long Line",
+            cls.MINI_LOAD: "Mini Load"
+        }
+        return names.get(line_id, "Unknown")
+    @classmethod
+    def get_all_names(cls):
+        """Get dictionary mapping line type IDs to names"""
+        return {
+            cls.LONG_LINE: "Long Line",
+            cls.MINI_LOAD: "Mini Load"
+        }
+class KitLevel:
+    """
+    Kit hierarchy level constants
+    0 = Prepack, 1 = Subkit, 2 = Master
+    """
+    PREPACK = 0
+    SUBKIT = 1
+    MASTER = 2
+    # All available levels
+    ALL_LEVELS = [PREPACK, SUBKIT, MASTER]
+    @classmethod
+    def get_name(cls, level_id):
+        """Get human-readable name for kit level ID"""
+        names = {
+            cls.PREPACK: "prepack",
+            cls.SUBKIT: "subkit",
+            cls.MASTER: "master"
+        }
+        return names.get(level_id, "unknown")
+    @classmethod
+    def get_all_names(cls):
+        """Get dictionary mapping level IDs to names"""
+        return {
+            cls.PREPACK: "prepack",
+            cls.SUBKIT: "subkit",
+            cls.MASTER: "master"
+        }
+    # Removed get_timing_weight method - no longer needed
+    # Dependency ordering is now handled by topological sorting
+class PaymentMode:
+    """
+    Payment mode constants
+    """
+    BULK = "bulk"
+    PARTIAL = "partial"
+    @classmethod
+    def get_all_modes(cls):
+        """Get all available payment modes"""
+        return [cls.BULK, cls.PARTIAL]
+# Default configurations using constants
+class DefaultConfig:
+    """Default configuration values using constants"""
+    # Default payment modes by shift
+    PAYMENT_MODE_CONFIG = {
+        ShiftType.REGULAR: PaymentMode.BULK,
+        ShiftType.EVENING: PaymentMode.BULK,
+        ShiftType.OVERTIME: PaymentMode.PARTIAL
+    }
+    # Default max hours per shift per person
+    MAX_HOUR_PER_SHIFT_PER_PERSON = {
+        ShiftType.REGULAR: 7.5,
+        ShiftType.EVENING: 7.5,
+        ShiftType.OVERTIME: 5
+    }
+    # Default max parallel workers per line type
+    MAX_PARALLEL_WORKERS = {
+        LineType.LONG_LINE: 15,
+        LineType.MINI_LOAD: 15
+    }
+    # Default minimum UNICEF fixed-term employees per day
+    FIXED_MIN_UNICEF_PER_DAY = 2
+    # Default line counts
+    LINE_COUNT_LONG_LINE = 3
+    LINE_COUNT_MINI_LOAD = 2
+    # Default max parallel workers per line (for UI)
+    MAX_PARALLEL_WORKERS_LONG_LINE = 7
+    MAX_PARALLEL_WORKERS_MINI_LOAD = 5
+    # Default cost rates (example values)
+    DEFAULT_COST_RATES = {
+        "UNICEF Fixed term": {
+            ShiftType.REGULAR: 43.27,
+            ShiftType.EVENING: 43.27,
+            ShiftType.OVERTIME: 64.91
+        },
+        "Humanizer": {
+            ShiftType.REGULAR: 27.94,
+            ShiftType.EVENING: 27.94,
+            ShiftType.OVERTIME: 41.91
+        }
+    }
+    #get employee type list from data files
+    EMPLOYEE_TYPE_LIST =extract.read_employee_data()["employment_type"].unique().tolist()
+    SHIFT_LIST = extract.get_shift_info()["id"].unique().tolist()
+    EVENING_SHIFT_MODE = "normal"
+    EVENING_SHIFT_DEMAND_THRESHOLD = 0.9
+    # Default schedule type
+    SCHEDULE_TYPE = "weekly"
+    # Default fixed staff mode
+    FIXED_STAFF_MODE = "priority"
+    # Default hourly rates for UI (simplified)
+    UNICEF_RATE_SHIFT_1 = 12.5
+    UNICEF_RATE_SHIFT_2 = 15.0
+    UNICEF_RATE_SHIFT_3 = 18.75
+    HUMANIZER_RATE_SHIFT_1 = 10.0
+    HUMANIZER_RATE_SHIFT_2 = 12.0
+    HUMANIZER_RATE_SHIFT_3 = 15.0
+    LINE_LIST = extract.read_packaging_line_data()["id"].unique().tolist()
+    LINE_CNT_PER_TYPE = extract.read_packaging_line_data().set_index("id")["line_count"].to_dict()
+    # Dynamic method to get max employee per type on day
+    @staticmethod
+    def get_max_employee_per_type_on_day(date_span):
+        """Get max employee per type configuration for given date span"""
+        return {
+            "UNICEF Fixed term": {
+                t: 8 for t in date_span
+            },
+            "Humanizer": {
+                t: 10 for t in date_span
+            }
+        }
+    MAX_UNICEF_PER_DAY = 8
+    MAX_HUMANIZER_PER_DAY = 10
+    MAX_HOUR_PER_PERSON_PER_DAY = 14
+    KIT_LEVELS, KIT_DEPENDENCIES, PRODUCTION_PRIORITY_ORDER = extract.get_production_order_data()

src/config/optimization_config.py ADDED Viewed

	@@ -0,0 +1,450 @@

+import pandas as pd
+import src.preprocess.transform as transformed_data
+import datetime
+from datetime import timedelta
+import src.preprocess.extract as extract
+from src.config.constants import ShiftType, LineType, KitLevel, DefaultConfig
+# Re-import all the packages
+import importlib
+# Reload modules to get latest changes - REMOVED to prevent infinite loops
+# importlib.reload(extract)
+# importlib.reload(transformed_data)  # Uncomment if needed
+def get_date_span():
+    """Get date span from streamlit session state, or return default"""
+    try:
+        import streamlit as st
+        if hasattr(st, 'session_state'):
+            # Get from session state without printing (avoid spam)
+            if 'start_date' in st.session_state and 'planning_days' in st.session_state:
+                from datetime import datetime, timedelta
+                start_date = datetime.combine(st.session_state.start_date, datetime.min.time())
+                planning_days = st.session_state.planning_days
+                end_date = start_date + timedelta(days=planning_days - 1)
+                date_span = list(range(1, planning_days + 1))
+                return date_span, start_date, end_date
+    except:
+        pass
+    # Default values - no printing to avoid spam
+    from datetime import datetime
+    return list(range(1, 6)), datetime(2025, 7, 7), datetime(2025, 7, 11)
+# Only call get_date_span() when explicitly needed - avoid module-level execution
+# DATE_SPAN, start_date, end_date = get_date_span()  # REMOVED - called dynamically instead
+DATE_SPAN = None
+start_date = None
+end_date = None
+def get_product_list():
+    """Get filtered product list without printing spam"""
+    try:
+        from src.demand_filtering import DemandFilter
+        filter_instance = DemandFilter()
+        filter_instance.load_data(force_reload=True)
+        return filter_instance.get_filtered_product_list()
+    except:
+        # Fallback: get from session state start_date
+        date_span, start_date, end_date = get_date_span()
+        return transformed_data.get_released_product_list(start_date)
+def get_employee_type_list():
+    """Get employee type list from session state or default"""
+    try:
+        import streamlit as st
+        if hasattr(st, 'session_state') and 'selected_employee_types' in st.session_state:
+            return st.session_state.selected_employee_types
+    except:
+        pass
+    # Default: load from data files
+    employee_type_list = extract.read_employee_data()
+    return employee_type_list["employment_type"].unique().tolist()
+def get_shift_list():
+    """Get shift list from session state or default"""
+    try:
+        import streamlit as st
+        if hasattr(st, 'session_state') and 'selected_shifts' in st.session_state:
+            return st.session_state.selected_shifts
+    except:
+        pass
+    # Default: load from data files
+    shift_list = extract.get_shift_info()
+    return shift_list["id"].unique().tolist()
+# Evening shift activation mode - define early to avoid circular dependency
+# Options:
+#   "normal" - Only use regular shift (1) and overtime shift (3) - NO evening shift
+#   "activate_evening" - Allow evening shift (2) when demand is too high or cost-effective
+#   "always_available" - Evening shift always available as option
+EVENING_SHIFT_MODE = "normal"  # Default: only regular + overtime
+# Evening shift activation threshold
+# If demand cannot be met with regular + overtime, suggest evening shift activation
+EVENING_SHIFT_DEMAND_THRESHOLD = 0.9  # Activate if regular+overtime capacity < 90% of demand
+#Where?
+def get_active_shift_list():
+    """
+    Get the list of active shifts based on EVENING_SHIFT_MODE setting.
+    """
+    all_shifts = get_shift_list()
+    if EVENING_SHIFT_MODE == "normal":
+        # Only regular and overtime shifts - NO evening shift
+        active_shifts = [s for s in all_shifts if s in ShiftType.REGULAR_AND_OVERTIME]
+        print(f"[SHIFT MODE] Normal mode: Using shifts {active_shifts} (Regular + Overtime only, NO evening)")
+    elif EVENING_SHIFT_MODE == "activate_evening":
+        # All shifts including evening (2)
+        active_shifts = list(all_shifts)
+        print(f"[SHIFT MODE] Evening activated: Using all shifts {active_shifts}")
+    elif EVENING_SHIFT_MODE == "always_available":
+        # All shifts always available
+        active_shifts = list(all_shifts)
+        print(f"[SHIFT MODE] Always available: Using all shifts {active_shifts}")
+    else:
+        # Default to normal mode
+        active_shifts = [s for s in all_shifts if s in ShiftType.REGULAR_AND_OVERTIME]
+        print(f"[SHIFT MODE] Unknown mode '{EVENING_SHIFT_MODE}', defaulting to normal: {active_shifts}")
+    return active_shifts
+# DO NOT load at import time - always call get_active_shift_list() dynamically
+# SHIFT_LIST = get_active_shift_list()  # REMOVED - was causing stale data!
+#where?
+def get_line_list():
+    """Get line list - try from streamlit session state first, then from data files"""
+    try:
+        # Try to get from streamlit session state (from Dataset Metadata page)
+        import streamlit as st
+        if hasattr(st, 'session_state') and 'selected_lines' in st.session_state:
+            print(f"Using lines from Dataset Metadata page: {st.session_state.selected_lines}")
+            return st.session_state.selected_lines
+    except Exception as e:
+        print(f"Could not get lines from streamlit session: {e}")
+    # Default: load from data files
+    print(f"Loading line list from data files")
+    line_df = extract.read_packaging_line_data()
+    line_list = line_df["id"].unique().tolist()
+    return line_list
+# DO NOT load at import time - always call get_line_list() dynamically
+# LINE_LIST = get_line_list()  # REMOVED - was causing stale data!
+#where?
+def get_kit_line_match():
+    kit_line_match = extract.read_kit_line_match_data()
+    kit_line_match_dict = kit_line_match.set_index("kit_name")["line_type"].to_dict()
+    # Create line name to ID mapping
+    line_name_to_id = {
+        "long line": LineType.LONG_LINE,
+        "mini load": LineType.MINI_LOAD,
+        "miniload": LineType.MINI_LOAD,     # Alternative naming (no space)
+        "Long_line": LineType.LONG_LINE,    # Alternative naming
+        "Mini_load": LineType.MINI_LOAD,    # Alternative naming
+    }
+    # Convert string line names to numeric IDs
+    converted_dict = {}
+    for kit, line_name in kit_line_match_dict.items():
+        if isinstance(line_name, str) and line_name.strip():
+            # Convert string names to numeric IDs
+            line_id = line_name_to_id.get(line_name.strip(), None)
+            if line_id is not None:
+                converted_dict[kit] = line_id
+            else:
+                print(f"Warning: Unknown line type '{line_name}' for kit {kit}")
+                # Default to long line if unknown
+                converted_dict[kit] = LineType.LONG_LINE
+        elif isinstance(line_name, (int, float)) and not pd.isna(line_name):
+            # Already numeric
+            converted_dict[kit] = int(line_name)
+        else:
+            # Missing or empty line type - skip (no production needed for non-standalone masters)
+            pass  # Don't add to converted_dict - these kits won't have line assignments
+    return converted_dict
+KIT_LINE_MATCH_DICT = get_kit_line_match()
+def get_line_cnt_per_type():
+    try:
+        # Try to get from streamlit session state (from config page)
+        import streamlit as st
+        if hasattr(st, 'session_state') and 'line_counts' in st.session_state:
+            print(f"Using line counts from config page: {st.session_state.line_counts}")
+            return st.session_state.line_counts
+    except Exception as e:
+        print(f"Could not get line counts from streamlit session: {e}")
+    print(f"Loading default line count values from data files")
+    line_df = extract.read_packaging_line_data()
+    line_cnt_per_type = line_df.set_index("id")["line_count"].to_dict()
+    print("line cnt per type", line_cnt_per_type)
+    return line_cnt_per_type
+# DO NOT load at import time - always call get_line_cnt_per_type() dynamically
+# LINE_CNT_PER_TYPE = get_line_cnt_per_type()  # REMOVED - was causing stale data!
+#where?
+def get_demand_dictionary(force_reload=False):
+    """
+    Get filtered demand dictionary.
+    IMPORTANT: This dynamically loads data to reflect current Streamlit configs/dates.
+    """
+    try:
+        # Always get fresh filtered demand to reflect current configs
+        from src.demand_filtering import DemandFilter
+        filter_instance = DemandFilter()
+        # Force reload data to pick up new dates/configs
+        filter_instance.load_data(force_reload=True)
+        demand_dictionary = filter_instance.get_filtered_demand_dictionary()
+        print(f"📈 FRESH FILTERED DEMAND: {len(demand_dictionary)} products with total demand {sum(demand_dictionary.values())}")
+        print(f"🔄 LOADED DYNAMICALLY: Reflects current Streamlit configs")
+        return demand_dictionary
+    except Exception as e:
+        print(f"Error loading dynamic demand dictionary: {e}")
+        raise Exception("Demand dictionary not found with error:"+str(e))
+# DO NOT load at import time - always call get_demand_dictionary() dynamically
+# DEMAND_DICTIONARY = get_demand_dictionary()  # REMOVED - was causing stale data!
+#delete as already using default cost rates
+def get_cost_list_per_emp_shift():
+    try:
+        # Try to get from streamlit session state (from config page)
+        import streamlit as st
+        if hasattr(st, 'session_state') and 'cost_list_per_emp_shift' in st.session_state:
+            print(f"Using cost list from config page: {st.session_state.cost_list_per_emp_shift}")
+            return st.session_state.cost_list_per_emp_shift
+    except Exception as e:
+        print(f"Could not get cost list from streamlit session: {e}")
+    print(f"Loading default cost values")
+    # Default hourly rates - Important: multiple employment types with different costs
+    return DefaultConfig.DEFAULT_COST_RATES
+def shift_code_to_name():
+    return ShiftType.get_all_names()
+def line_code_to_name():
+    """Convert line type IDs to readable names"""
+    return LineType.get_all_names()
+# DO NOT load at import time - always call get_cost_list_per_emp_shift() dynamically
+# COST_LIST_PER_EMP_SHIFT = get_cost_list_per_emp_shift()  # REMOVED - was causing stale data!
+# COST_LIST_PER_EMP_SHIFT = {  # WH_Workforce_Hourly_Pay_Scale
+#     "Fixed": {1: 0, 2: 22, 3: 18},
+#     "Humanizer": {1: 10, 2: 10, 3: 10},
+# }
+#where to put?
+def get_team_requirements(product_list=None):
+    """
+    Extract team requirements from Kits Calculation CSV.
+    Returns dictionary with employee type as key and product requirements as nested dict.
+    """
+    if product_list is None:
+        product_list = get_product_list()  # Get fresh product list
+    kits_df = extract.read_personnel_requirement_data()
+    team_req_dict = {
+        "UNICEF Fixed term": {},
+        "Humanizer": {}
+    }
+    # Process each product in the product list
+    for product in product_list:
+        print("product",product)
+        print(f"Processing team requirements for product: {product}")
+        product_data = kits_df[kits_df['Kit'] == product]
+        print("product_data",product_data)
+        if not product_data.empty:
+            # Extract Humanizer and UNICEF staff requirements
+            humanizer_req = product_data["Humanizer"].iloc[0]
+            unicef_req = product_data["UNICEF staff"].iloc[0]
+            # Convert to int (data is already cleaned in extract function)
+            team_req_dict["Humanizer"][product] = int(humanizer_req)
+            team_req_dict["UNICEF Fixed term"][product] = int(unicef_req)
+        else:
+            print(f"Warning: Product {product} not found in Kits Calculation data, setting requirements to 0")
+    return team_req_dict
+def get_max_employee_per_type_on_day():
+    try:
+        # Try to get from streamlit session state (from config page)
+        import streamlit as st
+        if hasattr(st, 'session_state') and 'max_employee_per_type_on_day' in st.session_state:
+            print(f"Using max employee counts from config page: {st.session_state.max_employee_per_type_on_day}")
+            return st.session_state.max_employee_per_type_on_day
+    except Exception as e:
+        print(f"Could not get max employee counts from streamlit session: {e}")
+    print(f"Loading default max employee values")
+    # Get date span dynamically if not available
+    if DATE_SPAN is None:
+        date_span, _, _ = get_date_span()
+    else:
+        date_span = DATE_SPAN
+    max_employee_per_type_on_day = {
+        "UNICEF Fixed term": {
+            t: 8 for t in date_span
+        },
+        "Humanizer": {
+            t: 10 for t in date_span
+        }
+    }
+    return max_employee_per_type_on_day
+# Keep the constant for backward compatibility, but use function instead
+MAX_HOUR_PER_PERSON_PER_DAY = 14  # legal standard
+def get_max_hour_per_shift_per_person():
+    """Get max hours per shift per person from session state or default"""
+    try:
+        import streamlit as st
+        if hasattr(st, 'session_state'):
+            # Build from individual session state values
+            max_hours = {
+                ShiftType.REGULAR: st.session_state.get('max_hours_shift_1', DefaultConfig.MAX_HOUR_PER_SHIFT_PER_PERSON[ShiftType.REGULAR]),
+                ShiftType.EVENING: st.session_state.get('max_hours_shift_2', DefaultConfig.MAX_HOUR_PER_SHIFT_PER_PERSON[ShiftType.EVENING]),
+                ShiftType.OVERTIME: st.session_state.get('max_hours_shift_3', DefaultConfig.MAX_HOUR_PER_SHIFT_PER_PERSON[ShiftType.OVERTIME])
+            }
+            return max_hours
+    except Exception as e:
+        print(f"Could not get max hours per shift from session: {e}")
+    # Fallback to default
+    return DefaultConfig.MAX_HOUR_PER_SHIFT_PER_PERSON
+# Keep these complex getters that access DefaultConfig or have complex logic:
+def get_evening_shift_demand_threshold():
+    """Get evening shift demand threshold from session state or default"""
+    try:
+        import streamlit as st
+        if hasattr(st, 'session_state'):
+            return st.session_state.get('evening_shift_threshold', DefaultConfig.EVENING_SHIFT_DEMAND_THRESHOLD)
+    except Exception as e:
+        print(f"Could not get evening shift threshold from session: {e}")
+    # Fallback to default
+    return DefaultConfig.EVENING_SHIFT_DEMAND_THRESHOLD
+# ---- Kit Hierarchy for Production Ordering ----
+def get_kit_hierarchy_data():
+    kit_levels, dependencies, priority_order = extract.get_production_order_data()
+    return kit_levels, dependencies, priority_order
+KIT_LEVELS, KIT_DEPENDENCIES, PRODUCTION_PRIORITY_ORDER = get_kit_hierarchy_data()
+print(f"Kit Hierarchy loaded: {len(KIT_LEVELS)} kits, Priority order: {len(PRODUCTION_PRIORITY_ORDER)} items")
+def get_kit_levels():
+    """Get kit levels lazily - returns {kit_id: level} where 0=prepack, 1=subkit, 2=master"""
+    kit_levels, _, _ = get_kit_hierarchy_data()
+    return kit_levels
+def get_kit_dependencies():
+    """Get kit dependencies lazily - returns {kit_id: [dependency_list]}"""
+    _, dependencies, _ = get_kit_hierarchy_data()
+    return dependencies
+def get_max_parallel_workers():
+    """Get max parallel workers from session state or default"""
+    try:
+        import streamlit as st
+        if hasattr(st, 'session_state'):
+            # Build from individual session state values
+            max_parallel_workers = {
+                LineType.LONG_LINE: st.session_state.get('max_parallel_workers_long_line', DefaultConfig.MAX_PARALLEL_WORKERS_LONG_LINE),
+                LineType.MINI_LOAD: st.session_state.get('max_parallel_workers_mini_load', DefaultConfig.MAX_PARALLEL_WORKERS_MINI_LOAD)
+            }
+            return max_parallel_workers
+    except Exception as e:
+        print(f"Could not get max parallel workers from session: {e}")
+    # Fallback to default
+    return {
+        LineType.LONG_LINE: DefaultConfig.MAX_PARALLEL_WORKERS_LONG_LINE,
+        LineType.MINI_LOAD: DefaultConfig.MAX_PARALLEL_WORKERS_MINI_LOAD
+    }
+def get_fixed_min_unicef_per_day():
+    """
+    Get fixed minimum UNICEF employees per day - try from streamlit session state first, then default
+    This ensures a minimum number of UNICEF fixed-term staff are present every working day
+    """
+    try:
+        import streamlit as st
+        if hasattr(st, 'session_state') and 'fixed_min_unicef_per_day' in st.session_state:
+            print(f"Using fixed minimum UNICEF per day from config page: {st.session_state.fixed_min_unicef_per_day}")
+            return st.session_state.fixed_min_unicef_per_day
+    except ImportError:
+        pass
+    # Fallback to default configuration
+    return DefaultConfig.FIXED_MIN_UNICEF_PER_DAY
+def get_payment_mode_config():
+    """
+    Get payment mode configuration - try from streamlit session state first, then default values
+    Payment modes:
+    - "bulk": If employee works any hours in shift, pay for full shift hours
+    - "partial": Pay only for actual hours worked
+    """
+    try:
+        # Try to get from streamlit session state (from Dataset Metadata page)
+        import streamlit as st
+        if hasattr(st, 'session_state') and 'payment_mode_config' in st.session_state:
+            print(f"Using payment mode config from streamlit session: {st.session_state.payment_mode_config}")
+            return st.session_state.payment_mode_config
+    except Exception as e:
+        print(f"Could not get payment mode config from streamlit session: {e}")
+    # Default payment mode configuration
+    print(f"Loading default payment mode configuration")
+    payment_mode_config = DefaultConfig.PAYMENT_MODE_CONFIG
+    return payment_mode_config
+print("✅ Module-level configuration functions defined (variables initialized dynamically)")

src/config/paths.yaml ADDED Viewed

	@@ -0,0 +1,19 @@

+# Data Paths Configuration
+# All paths are relative to the project root directory
+data:
+  # CSV data files
+  csv:
+    demand: "data/real_data_excel/converted_csv/COOIS_Planned_and_Released.csv"
+    kit_composition: "data/real_data_excel/converted_csv/Kit_Composition_and_relation_cleaned_with_line_type.csv"
+    workforce_pay_scale: "data/real_data_excel/converted_csv/WH_Workforce_Hourly_Pay_Scale_processed.csv"
+    work_shift: "data/real_data_excel/converted_csv/work_shift.csv"
+    work_center_capacity: "data/real_data_excel/converted_csv/Work_Centre_Capacity.csv"
+    work_center_capacity_processed: "data/real_data_excel/converted_csv/Work_Centre_Capacity_processed.csv"
+    material_master: "data/real_data_excel/converted_csv/Material_Master_WMS.csv"
+    kits_calculation: "data/real_data_excel/converted_csv/Kits__Calculation.csv"
+  # Hierarchy data
+  hierarchy:
+    kit_hierarchy: "data/hierarchy_exports/kit_hierarchy.json"

src/demand_filtering.py ADDED Viewed

	@@ -0,0 +1,413 @@

+"""
+Demand Data Filtering Module
+This module filters demand data to include only products that are ready for optimization.
+Excludes products that:
+1. Have no line type assignments (non-standalone masters)
+2. Have zero staffing requirements (both Humanizer and UNICEF staff = 0)
+The filtered data is used by the optimization system.
+"""
+import pandas as pd
+from typing import Dict, List, Tuple
+from src.preprocess import extract
+class DemandFilter:
+    """
+    Filters demand data to include only products ready for optimization
+    """
+    def __init__(self):
+        self.demand_data = None
+        self.kit_levels = None
+        self.kit_dependencies = None
+        self.line_assignments = None
+        self.team_requirements = None
+        self.speed_data = None
+    def load_data(self, force_reload=False):
+        """Load all necessary data for filtering"""
+        try:
+            # Skip loading if data already exists and not forcing reload
+            if not force_reload and self.demand_data is not None:
+                print("📊 Using cached filter data (set force_reload=True to refresh)")
+                return True
+            print("🔄 Loading fresh filtering data...")
+            # Get start date for demand data from optimization config
+            from src.config.optimization_config import get_date_span
+            date_span, start_date, end_date = get_date_span()
+            print(f"🗓️ DEMAND FILTERING DATE: Using {start_date.date() if start_date else 'None'} (same as optimization)")
+            # Load demand data directly from extract
+            demand_df = extract.read_orders_data(start_date=start_date)
+            self.demand_data = demand_df.groupby('Material Number')["Order quantity (GMEIN)"].sum().to_dict()
+            # Load kit hierarchy data
+            kit_levels, dependencies, _ = extract.get_production_order_data()
+            self.kit_levels = kit_levels
+            self.kit_dependencies = dependencies
+            # Load line assignments from kit line match data
+            kit_line_match = extract.read_kit_line_match_data()
+            kit_line_match_dict = kit_line_match.set_index("kit_name")["line_type"].to_dict()
+            # Convert string line names to numeric IDs
+            from src.config.constants import LineType
+            line_name_to_id = {
+                "long line": LineType.LONG_LINE,
+                "mini load": LineType.MINI_LOAD,
+                "miniload": LineType.MINI_LOAD,
+                "Long_line": LineType.LONG_LINE,
+                "Mini_load": LineType.MINI_LOAD,
+            }
+            self.line_assignments = {}
+            for kit, line_name in kit_line_match_dict.items():
+                if isinstance(line_name, str) and line_name.strip():
+                    line_id = line_name_to_id.get(line_name.strip())
+                    if line_id is not None:
+                        self.line_assignments[kit] = line_id
+                elif isinstance(line_name, (int, float)) and not pd.isna(line_name):
+                    self.line_assignments[kit] = int(line_name)
+            # Load team requirements from Kits Calculation data
+            kits_df = extract.read_personnel_requirement_data()
+            self.team_requirements = {
+                'UNICEF Fixed term': kits_df.set_index('Kit')['UNICEF staff'].to_dict(),
+                'Humanizer': kits_df.set_index('Kit')['Humanizer'].to_dict()
+            }
+            # Load production speed data
+            self.speed_data = extract.read_package_speed_data()
+            print(f"✅ Filtering data loaded: {len(self.demand_data)} products with demand, {len(self.speed_data)} with speed data")
+            return True
+        except Exception as e:
+            print(f"Error loading data for filtering: {str(e)}")
+            return False
+    def standalone_master_filter(self, product_id: str) -> Tuple[str, bool]:
+        """
+        Classify product type and check if it's a standalone master.
+        Returns:
+            Tuple[str, bool]: (product_type, is_standalone_master)
+        """
+        if product_id in self.kit_levels:
+            level = self.kit_levels[product_id]
+            if level == 0:
+                return "prepack", False
+            elif level == 1:
+                return "subkit", False
+            elif level == 2:
+                # Check if this master is standalone (no subkits/prepacks)
+                dependencies = self.kit_dependencies.get(product_id, [])
+                is_standalone = len(dependencies) == 0
+                return "master", is_standalone
+            else:
+                return "unknown", False
+        else:
+            return "unclassified", False
+    def _get_line_type_capacity(self, line_type: int) -> int:
+        """
+        Calculate the total capacity in hours for a specific line type.
+        Args:
+            line_type: The line type ID (e.g., 6 for Long Line, 7 for Mini Load)
+        Returns:
+            int: Total capacity in hours for this line type
+        """
+        from src.config.optimization_config import get_line_cnt_per_type, get_max_hour_per_shift_per_person, get_active_shift_list, get_date_span
+        line_cnt_per_type = get_line_cnt_per_type()
+        max_hours_per_shift_dict = get_max_hour_per_shift_per_person()
+        active_shifts = get_active_shift_list()
+        date_span, _, _ = get_date_span()  # Get date span dynamically
+        # Get line count for this specific line type
+        line_count = line_cnt_per_type.get(line_type, 0)
+        # Calculate total hours per day (sum of all active shift hours)
+        total_hours_per_day = sum(max_hours_per_shift_dict.get(shift, 0) for shift in active_shifts)
+        # Calculate available capacity hours
+        # Available hours = line_count × total_hours_per_day × days_in_period
+        available_hours = line_count * total_hours_per_day * len(date_span)
+        return available_hours
+    def get_maximum_packaging_capacity(self) -> int:
+        """
+        Get the maximum packaging capacity across all line types.
+        Returns:
+            int: Maximum total capacity in hours across all lines
+        """
+        from src.config.optimization_config import get_line_cnt_per_type
+        line_cnt_per_type = get_line_cnt_per_type()
+        total_capacity = 0
+        for line_type, line_count in line_cnt_per_type.items():
+            if line_count > 0:  # Only count active lines
+                line_capacity = self._get_line_type_capacity(line_type)
+                total_capacity += line_capacity
+        return total_capacity
+    def too_high_demand_filter(self, product_id: str) -> bool:
+        """
+        Check if the demand for a product is too high.
+        A product has "too high demand" when the total processing hours needed
+        exceeds the available capacity hours for the product's assigned line type.
+        NOTE: This method assumes all prerequisite data is available (demand > 0,
+        line assignment exists, speed data exists). The main filter function
+        should handle these edge cases.
+        Calculation:
+        - Processing hours needed = demand_quantity / production_speed_per_hour
+        - Available hours = line_count × hours_per_shift × shifts_per_day × days_in_period
+        Args:
+            product_id: The product ID to check
+        Returns:
+            bool: True if demand is too high (should be excluded), False otherwise
+        """
+        # Get demand for this product (assumes demand > 0, checked by main filter)
+        demand = self.demand_data.get(product_id, 0)
+        if demand <= 0:
+            return False
+        # Get line assignment for this product (assumes exists, checked by main filter)
+        if self.line_assignments is None or product_id not in self.line_assignments:
+            return False
+        line_type = self.line_assignments.get(product_id)
+        # Get production speed data (assumes exists, checked by main filter)
+        if self.speed_data is None or product_id not in self.speed_data:
+            return False
+        production_speed_per_hour = self.speed_data[product_id]
+        # Calculate processing hours needed
+        processing_hours_needed = demand / production_speed_per_hour
+        # Get available capacity for this specific line type
+        available_hours = self._get_line_type_capacity(line_type)
+        # Check if processing hours needed exceeds available capacity
+        is_too_high = processing_hours_needed > available_hours
+        if is_too_high:
+            print(f"⚠️  HIGH DEMAND WARNING: {product_id} needs {processing_hours_needed:.1f}h but only {available_hours:.1f}h available (line_type={line_type}, demand={demand}, speed={production_speed_per_hour:.1f}/h)")
+        return is_too_high
+    def is_product_ready_for_optimization(self, product_id: str) -> Tuple[bool, List[str]]:
+        """
+        Check if a single product is ready for optimization.
+        1) Should have demand higher than 0
+        2) Should be right type - standalone master, subkit, prepack
+        3) Should have line assignment
+        4) Should have staffing requirements
+        5) Should have production speed data
+        Returns:
+            Tuple[bool, List[str]]: (is_ready, exclusion_reasons)
+        """
+        exclusion_reasons = []
+        # Check if product has positive demand
+        demand = self.demand_data.get(product_id, 0)
+        if demand <= 0:
+            exclusion_reasons.append("No demand or zero demand")
+        # Classify product type
+        product_type, is_standalone_master = self.standalone_master_filter(product_id)
+        # Check line assignment logic
+        has_line_assignment = product_id in self.line_assignments
+        # For masters: standalone should have line assignment, non-standalone should NOT
+        if product_type == "master":
+            if is_standalone_master:
+                if not has_line_assignment:
+                    exclusion_reasons.append("Standalone master missing line assignment")
+                elif self.line_assignments.get(product_id) != 6:  # 6 = LONG_LINE
+                    exclusion_reasons.append("Standalone master should have long line assignment")
+            else:
+                # Non-standalone masters should NOT have line assignment (excluded from production)
+                exclusion_reasons.append("Non-standalone master (excluded from production)")
+        else:
+            # For subkits and prepacks, check normal line assignment
+            if not has_line_assignment:
+                exclusion_reasons.append("No line assignment")
+        # Check staffing requirements
+        unicef_staff = self.team_requirements.get('UNICEF Fixed term', {}).get(product_id, 0)
+        humanizer_staff = self.team_requirements.get('Humanizer', {}).get(product_id, 0)
+        total_staff = unicef_staff + humanizer_staff
+        if total_staff == 0:
+            exclusion_reasons.append("Zero staffing requirements")
+        # Check production speed data
+        if self.speed_data is None or product_id not in self.speed_data:
+            exclusion_reasons.append("Missing production speed data")
+        # Check if demand is too high (only if we have all required data)
+        if self.too_high_demand_filter(product_id):
+            exclusion_reasons.append("Demand exceeds available production capacity")
+        is_ready = len(exclusion_reasons) == 0
+        return is_ready, exclusion_reasons
+    def filter_products(self) -> Tuple[List[str], Dict[str, int], List[str], Dict[str, int]]:
+        """
+        Filter products into included and excluded lists based on optimization readiness.
+        Uses is_product_ready_for_optimization() to check all criteria.
+        Returns:
+            Tuple containing:
+            - included_products: List of product IDs ready for optimization
+            - included_demand: Dict of {product_id: demand} for included products
+            - excluded_products: List of product IDs excluded from optimization
+            - excluded_demand: Dict of {product_id: demand} for excluded products
+        """
+        if not self.load_data():
+            raise Exception("Failed to load data for filtering")
+        included_products = []
+        included_demand = {}
+        excluded_products = []
+        excluded_demand = {}
+        excluded_details = {}
+        for product_id, demand in self.demand_data.items():
+            is_ready, exclusion_reasons = self.is_product_ready_for_optimization(product_id)
+            if is_ready:
+                included_products.append(product_id)
+                included_demand[product_id] = demand
+            else:
+                excluded_products.append(product_id)
+                excluded_demand[product_id] = demand
+                excluded_details[product_id] = exclusion_reasons
+        # Sort products for consistent output
+        included_products.sort()
+        excluded_products.sort()
+        # Print data quality warnings for included products
+        included_without_hierarchy = sum(1 for pid in included_products if self.standalone_master_filter(pid)[0] == "unclassified")
+        if included_without_hierarchy > 0:
+            print(f"\n⚠️  DATA QUALITY WARNING: {included_without_hierarchy} included products missing hierarchy data")
+        return included_products, included_demand, excluded_products, excluded_demand
+    def get_filtered_product_list(self) -> List[str]:
+        """Get list of products ready for optimization"""
+        included_products, _, _, _ = self.filter_products()
+        return included_products
+    def get_filtered_demand_dictionary(self) -> Dict[str, int]:
+        """Get demand dictionary for products ready for optimization"""
+        _, included_demand, _, _ = self.filter_products()
+        return included_demand
+    def get_complete_product_analysis(self) -> Dict:
+        """Get complete analysis of all products for visualization"""
+        included_products, included_demand, excluded_products, excluded_demand = self.filter_products()
+        all_products = {**included_demand, **excluded_demand}
+        product_details = {}
+        # Load speed data for additional validation
+        speed_data = None
+        try:
+            from src.config import optimization_config
+            from src.preprocess import extract
+            speed_data = extract.read_package_speed_data()
+        except Exception as e:
+            print(f"Warning: Could not load speed data for analysis: {e}")
+        for product_id, demand in all_products.items():
+            product_type, is_standalone_master = self.standalone_master_filter(product_id)
+            is_ready, exclusion_reasons = self.is_product_ready_for_optimization(product_id)
+            # Get staffing info
+            unicef_staff = self.team_requirements.get('UNICEF Fixed term', {}).get(product_id, 0)
+            humanizer_staff = self.team_requirements.get('Humanizer', {}).get(product_id, 0)
+            # Get line assignment
+            line_assignment = self.line_assignments.get(product_id)
+            # Get production speed info
+            has_speed_data = speed_data is not None and product_id in speed_data
+            # too high demand
+            has_too_high_demand = self.too_high_demand_filter(product_id)
+            product_details[product_id] = {
+                'demand': demand,
+                'product_type': product_type,
+                'is_standalone_master': is_standalone_master,
+                'is_included_in_optimization': is_ready,
+                'exclusion_reasons': exclusion_reasons,
+                'unicef_staff': unicef_staff,
+                'humanizer_staff': humanizer_staff,
+                'total_staff': unicef_staff + humanizer_staff,
+                'line_assignment': line_assignment,
+                'has_line_assignment': line_assignment is not None,
+                'has_staffing': (unicef_staff + humanizer_staff) > 0,
+                'has_hierarchy': product_type != "unclassified",
+                'has_speed_data': has_speed_data,
+                'has_too_high_demand': has_too_high_demand
+            }
+        # Calculate data quality statistics for included products
+        included_without_speed = sum(1 for pid in included_products if not product_details[pid]['has_speed_data'])
+        included_without_hierarchy = sum(1 for pid in included_products if not product_details[pid]['has_hierarchy'])
+        # Count products excluded due to too high demand
+        excluded_with_too_high_demand = sum(1 for pid in excluded_products if product_details[pid]['has_too_high_demand'])
+        return {
+            'included_count': len(included_products),
+            'included_demand': sum(included_demand.values()),
+            'excluded_count': len(excluded_products),
+            'excluded_demand': sum(excluded_demand.values()),
+            'total_products': len(all_products),
+            'total_demand': sum(all_products.values()),
+            'product_details': product_details,
+            'standalone_masters_count': sum(1 for p in product_details.values() if p['is_standalone_master']),
+            'included_products': included_products,
+            'excluded_products': excluded_products,
+            # Data quality metrics for included products
+            'included_missing_speed_count': included_without_speed,
+            'included_missing_hierarchy_count': included_without_hierarchy,
+            'excluded_with_too_high_demand_count': excluded_with_too_high_demand
+        }
+# Test script when run directly
+if __name__ == "__main__":
+    # Test the filtering
+    filter_instance = DemandFilter()
+    included_products, included_demand, excluded_products, excluded_demand = filter_instance.filter_products()
+    print(f"\n=== FILTERING TEST RESULTS ===")
+    print(f"Included products: {included_products[:5]}..." if len(included_products) > 5 else f"Included products: {included_products}")
+    print(f"Excluded products: {excluded_products[:5]}..." if len(excluded_products) > 5 else f"Excluded products: {excluded_products}")

src/demand_validation_viz.py ADDED Viewed

	@@ -0,0 +1,278 @@

+#!/usr/bin/env python3
+"""
+Demand Data Validation Visualization Module
+Provides Streamlit visualization for demand data validation.
+Shows which products are included/excluded from optimization and why.
+"""
+import pandas as pd
+import streamlit as st
+from typing import Dict
+from src.config.constants import LineType
+from src.demand_filtering import DemandFilter
+# Simple mapping for product level names
+LEVEL_NAMES = {
+    'prepack': 'prepack',
+    'subkit': 'subkit',
+    'master': {
+        'standalone': 'standalone_master',
+        'with_hierarchy': 'master_with_hierarchy'
+    },
+    'unclassified': 'no_hierarchy_data'
+}
+class DemandValidationViz:
+    """
+    Simple visualization wrapper for demand filtering results.
+    All filtering logic is in DemandFilter - this just displays the results.
+    """
+    def __init__(self):
+        self.filter_instance = DemandFilter()
+        self.speed_data = None
+    def load_data(self):
+        """Load all data needed for visualization"""
+        try:
+            from src.config import optimization_config
+            from src.preprocess import extract
+            self.speed_data = extract.read_package_speed_data()
+            return self.filter_instance.load_data()
+        except Exception as e:
+            error_msg = f"Error loading data: {str(e)}"
+            print(error_msg)
+            if st:
+                st.error(error_msg)
+            return False
+    def validate_all_products(self) -> pd.DataFrame:
+        """
+        Create DataFrame with validation results for all products.
+        Main visualization method - converts filtering results to displayable format.
+        """
+        # Get analysis from filtering module
+        analysis = self.filter_instance.get_complete_product_analysis()
+        product_details = analysis['product_details']
+        results = []
+        for product_id, details in product_details.items():
+            # Calculate production hours if speed data available
+            speed = self.speed_data.get(product_id) if self.speed_data else None
+            production_hours = (details['demand'] / speed) if speed and speed > 0 else None
+            # Get line type name
+            line_type_id = details['line_assignment']
+            line_name = LineType.get_name(line_type_id) if line_type_id is not None else "no_assignment"
+            # Get level name (simplified)
+            ptype = details['product_type']
+            if ptype == 'unclassified':
+                level_name = LEVEL_NAMES['unclassified']
+            elif ptype == 'master':
+                level_name = LEVEL_NAMES['master']['standalone' if details['is_standalone_master'] else 'with_hierarchy']
+            else:
+                level_name = LEVEL_NAMES.get(ptype, f"level_{ptype}")
+            # Build validation status message
+            if not details['is_included_in_optimization']:
+                validation_status = f"🚫 Excluded: {', '.join(details['exclusion_reasons'])}"
+            else:
+                issues = []
+                if speed is None:
+                    issues.append("missing_speed_data (will use default)")
+                if not details['has_hierarchy']:
+                    issues.append("no_hierarchy_data")
+                validation_status = f"⚠️ Data Issues: {', '.join(issues)}" if issues else "✅ Ready for optimization"
+            if details['has_too_high_demand']:
+                issues.append("too_high_demand")
+                validation_status = f"⚠️ Data Issues: {', '.join(issues)}" if issues else "✅ Ready for optimization"
+            results.append({
+                'Product ID': product_id,
+                'Demand': details['demand'],
+                'Product Type': ptype.title(),
+                'Level': level_name,
+                'Is Standalone Master': "Yes" if details['is_standalone_master'] else "No",
+                'Line Type ID': line_type_id if line_type_id else "N/A",
+                'Line Type': line_name,
+                'UNICEF Staff': details['unicef_staff'],
+                'Humanizer Staff': details['humanizer_staff'],
+                'Total Staff': details['total_staff'],
+                'Production Speed (units/hour)': f"{speed:.1f}" if speed else "N/A",
+                'Production Hours Needed': f"{production_hours:.1f}" if production_hours else "N/A",
+                'Has Line Assignment': "✅" if details['has_line_assignment'] else "❌",
+                'Has Staffing Data': "✅" if details['has_staffing'] else "❌",
+                'Has Speed Data': "✅" if speed is not None else "❌ (will use default)",
+                'Has Hierarchy Data': "✅" if details['has_hierarchy'] else "❌",
+                'Excluded from Optimization': not details['is_included_in_optimization'],
+                'Exclusion Reasons': ', '.join(details['exclusion_reasons']) if details['exclusion_reasons'] else '',
+                'Data Quality Issues': ', '.join(issues) if details['is_included_in_optimization'] and 'issues' in locals() and issues else '',
+                'Has Too High Demand': "✅" if details['has_too_high_demand'] else "❌",
+                'Validation Status': validation_status
+            })
+        df = pd.DataFrame(results)
+        df = df.sort_values(['Excluded from Optimization', 'Demand'], ascending=[False, False])
+        return df
+    def get_summary_statistics(self, df: pd.DataFrame) -> Dict:
+        """Calculate summary statistics from validation results"""
+        analysis = self.filter_instance.get_complete_product_analysis()
+        included_df = df[df['Excluded from Optimization'] == False]
+        return {
+            'total_products': analysis['total_products'],
+            'total_demand': analysis['total_demand'],
+            'included_products': analysis['included_count'],
+            'excluded_products': analysis['excluded_count'],
+            'included_demand': analysis['included_demand'],
+            'excluded_demand': analysis['excluded_demand'],
+            'type_counts': df['Product Type'].value_counts().to_dict(),
+            'no_line_assignment': len(included_df[included_df['Has Line Assignment'] == "❌"]),
+            'no_staffing': len(included_df[included_df['Has Staffing Data'] == "❌"]),
+            'no_speed': len(included_df[included_df['Has Speed Data'].str.contains("❌")]),
+            'no_hierarchy': len(included_df[included_df['Has Hierarchy Data'] == "❌"]),
+            'standalone_masters': analysis['standalone_masters_count'],
+            'total_unicef_needed': sum(p['unicef_staff'] for p in analysis['product_details'].values()),
+            'total_humanizer_needed': sum(p['humanizer_staff'] for p in analysis['product_details'].values()),
+            'excluded_with_too_high_demand': analysis['excluded_with_too_high_demand_count']
+        }
+def display_demand_validation():
+    """
+    Display demand validation analysis in Streamlit.
+    Main entry point for the validation page.
+    """
+    st.header("📋 Demand Data Validation")
+    st.markdown("Analysis showing which products are included/excluded from optimization and data quality status.")
+    # Load and analyze data
+    validator = DemandValidationViz()
+    with st.spinner("Loading and analyzing data..."):
+        if not validator.load_data():
+            st.error("Failed to load data for validation.")
+            return
+        validation_df = validator.validate_all_products()
+        stats = validator.get_summary_statistics(validation_df)
+    # ===== SUMMARY METRICS =====
+    st.subheader("📊 Summary Statistics")
+    col1, col2, col3, col4 = st.columns(4)
+    col1.metric("Total Products", stats['total_products'])
+    col1.metric("Included in Optimization", stats['included_products'], delta="Ready")
+    col2.metric("Total Demand", f"{stats['total_demand']:,}")
+    col2.metric("Excluded from Optimization", stats['excluded_products'], delta="Omitted")
+    col3.metric("Included Demand", f"{stats['included_demand']:,}", delta="Will be optimized")
+    col3.metric("UNICEF Staff Needed", stats['total_unicef_needed'])
+    col4.metric("Excluded Demand", f"{stats['excluded_demand']:,}", delta="Omitted")
+    col4.metric("Humanizer Staff Needed", stats['total_humanizer_needed'])
+    # ===== PRODUCT TYPE DISTRIBUTION =====
+    st.subheader("📈 Product Type Distribution")
+    if stats['type_counts']:
+        col1, col2 = st.columns(2)
+        with col1:
+            type_df = pd.DataFrame(list(stats['type_counts'].items()), columns=['Product Type', 'Count'])
+            st.bar_chart(type_df.set_index('Product Type'))
+        with col2:
+            for ptype, count in stats['type_counts'].items():
+                percentage = (count / stats['total_products']) * 100
+                st.write(f"**{ptype}:** {count} products ({percentage:.1f}%)")
+    # ===== DATA QUALITY ISSUES (for included products only) =====
+    st.subheader("⚠️ Data Quality Issues (Included Products)")
+    st.write("Issues affecting products that **will be** included in optimization:")
+    col1, col2, col3, col4 = st.columns(4)
+    col1.metric("No Line Assignment", stats['no_line_assignment'],
+                delta=None if stats['no_line_assignment'] == 0 else "Issue")
+    col2.metric("No Staffing Data", stats['no_staffing'],
+                delta=None if stats['no_staffing'] == 0 else "Issue")
+    col3.metric("No Speed Data", stats['no_speed'],
+                delta=None if stats['no_speed'] == 0 else "Will use default")
+    col4.metric("No Hierarchy Data", stats['no_hierarchy'],
+                delta=None if stats['no_hierarchy'] == 0 else "Issue")
+    col5.metric("Excluded: Too High Demand", stats['excluded_with_too_high_demand'],
+                delta=None if stats['excluded_with_too_high_demand'] == 0 else "Excluded")
+    # ===== INCLUDED PRODUCTS TABLE =====
+    included_df = validation_df[validation_df['Excluded from Optimization'] == False].copy()
+    excluded_df = validation_df[validation_df['Excluded from Optimization'] == True].copy()
+    st.subheader("✅ Products Included in Optimization")
+    st.write(f"**{len(included_df)} products** with total demand of **{included_df['Demand'].sum():,} units**")
+    if len(included_df) > 0:
+        # Filters
+        col1, col2 = st.columns(2)
+        type_filter = col1.selectbox("Filter by type", ["All"] + list(included_df['Product Type'].unique()), key="inc_filter")
+        min_demand = col2.number_input("Minimum demand", min_value=0, value=0, key="inc_demand")
+        # Apply filters
+        filtered = included_df.copy()
+        if type_filter != "All":
+            filtered = filtered[filtered['Product Type'] == type_filter]
+        if min_demand > 0:
+            filtered = filtered[filtered['Demand'] >= min_demand]
+        # Display table
+        display_cols = ['Product ID', 'Demand', 'Product Type', 'Line Type', 'UNICEF Staff',
+                       'Humanizer Staff', 'Production Speed (units/hour)', 'Data Quality Issues', 'Validation Status']
+        st.dataframe(filtered[display_cols], use_container_width=True, height=300)
+    else:
+        st.warning("No products are included in optimization!")
+    # ===== EXCLUDED PRODUCTS TABLE =====
+    st.subheader("🚫 Products Excluded from Optimization")
+    st.write(f"**{len(excluded_df)} products** with total demand of **{excluded_df['Demand'].sum():,} units**")
+    st.info("Excluded due to: missing line assignments, zero staffing, or non-standalone masters")
+    if len(excluded_df) > 0:
+        # Show exclusion breakdown
+        st.write("**Exclusion reasons:**")
+        for reason, count in excluded_df['Exclusion Reasons'].value_counts().items():
+            st.write(f"• {reason}: {count} products")
+        # Display table
+        display_cols = ['Product ID', 'Demand', 'Product Type', 'Exclusion Reasons',
+                       'UNICEF Staff', 'Humanizer Staff', 'Line Type']
+        st.dataframe(excluded_df[display_cols], use_container_width=True, height=200)
+        # Export button
+        if st.button("📥 Export Validation Results to CSV"):
+            st.download_button("Download CSV", validation_df.to_csv(index=False),
+                             file_name="demand_validation_results.csv", mime="text/csv")
+    # ===== RECOMMENDATIONS =====
+    st.subheader("💡 Recommendations")
+    if stats['excluded_products'] > 0:
+        st.warning(f"**{stats['excluded_products']} products** ({stats['excluded_demand']:,} units) excluded from optimization")
+    # Show data quality issues for included products
+    if stats['no_line_assignment'] > 0:
+        st.info(f"**Line Assignment**: {stats['no_line_assignment']} included products missing line assignments")
+    if stats['no_staffing'] > 0:
+        st.info(f"**Staffing Data**: {stats['no_staffing']} included products missing staffing requirements")
+    if stats['no_speed'] > 0:
+        st.info(f"**Speed Data**: {stats['no_speed']} included products missing speed data (will use default 106.7 units/hour)")
+    if stats['no_hierarchy'] > 0:
+        st.info(f"**Hierarchy Data**: {stats['no_hierarchy']} included products not in kit hierarchy")
+    # Overall status
+    if stats['included_products'] > 0:
+        st.success(f"✅ **{stats['included_products']} products** with {stats['included_demand']:,} units demand ready for optimization!")
+        if stats['no_speed'] == 0 and stats['no_hierarchy'] == 0:
+            st.info("🎉 All included products have complete data!")
+    else:
+        st.error("❌ No products passed filtering. Review exclusion reasons and check data configuration.")
+if __name__ == "__main__":
+    # For testing
+    display_demand_validation()

src/models/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # This file makes the models directory a Python package

src/models/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (159 Bytes). View file

src/models/__pycache__/optimizer_real.cpython-310.pyc ADDED Viewed

Binary file (19.6 kB). View file

src/models/optimizer_real.py ADDED Viewed

	@@ -0,0 +1,780 @@

+# ============================================================
+# SD_roster_real - Fixed Team Production Planning (Option A)
+# - Uses config-style variable names from src/config/optimization_config.py
+# - Team per product (simultaneous): UNICEF Fixed term / Humanizer
+# - Line types via numeric ids: 6=long, 7=short
+# - One product per (line, shift, day)
+# - Weekly demand (across DATE_SPAN)
+# ============================================================
+from ortools.linear_solver import pywraplp
+from math import ceil
+import datetime
+from src.config.constants import ShiftType, LineType, KitLevel
+# ---- config import ----
+# Import constants and other modules directly
+from src.config.constants import ShiftType, LineType, DefaultConfig
+import src.preprocess.extract as extract
+from src.preprocess.hierarchy_parser import sort_products_by_hierarchy
+class Optimizer:
+    """Workforce optimization class that handles all configuration and optimization logic"""
+    def __init__(self):
+        """Initialize optimizer with session state configuration"""
+        self.load_session_state_config()
+        self.load_data()
+    def load_session_state_config(self):
+        """Load all configuration from session state"""
+        import streamlit as st
+        import datetime as dt
+        # Date configuration
+        self.start_date = st.session_state.start_date
+        self.planning_days = st.session_state.planning_days
+        self.start_datetime = dt.datetime.combine(self.start_date, dt.datetime.min.time())
+        self.end_date = self.start_datetime + dt.timedelta(days=self.planning_days - 1)
+        self.date_span = list(range(1, self.planning_days + 1))
+        # Employee and shift configuration
+        self.employee_type_list = list(st.session_state.selected_employee_types)
+        self.active_shift_list = sorted(list(st.session_state.selected_shifts))
+        print("\n[DEBUG] From session_state.selected_employee_types:")
+        for emp in self.employee_type_list:
+            print(f"  - '{emp}' (len={len(emp)}, repr={repr(emp)})")
+        # Working hours configuration
+        self.max_hour_per_person_per_day = st.session_state.max_hour_per_person_per_day
+        self.max_hours_shift = {
+            ShiftType.REGULAR: st.session_state.max_hours_shift_1,
+            ShiftType.EVENING: st.session_state.max_hours_shift_2,
+            ShiftType.OVERTIME: st.session_state.max_hours_shift_3
+        }
+        # Workforce limits
+        self.max_employee_per_type_on_day = st.session_state.max_employee_per_type_on_day
+        # Operations configuration
+        self.line_counts = st.session_state.line_counts
+        self.max_parallel_workers = {
+            LineType.LONG_LINE: st.session_state.max_parallel_workers_long_line,
+            LineType.MINI_LOAD: st.session_state.max_parallel_workers_mini_load
+        }
+        # Cost configuration
+        self.cost_list_per_emp_shift = st.session_state.cost_list_per_emp_shift
+        # Payment mode configuration
+        self.payment_mode_config = st.session_state.payment_mode_config
+        # Fixed staffing requirements
+        self.fixed_min_unicef_per_day = st.session_state.fixed_min_unicef_per_day
+        print("✅ Session state configuration loaded successfully")
+    def load_data(self):
+        """Load all required data from files"""
+        # Load hierarchy data
+        try:
+            kit_levels, dependencies, priority_order = extract.get_production_order_data()
+            self.kit_levels = kit_levels
+            self.kit_dependencies = dependencies
+            self.production_priority_order = priority_order
+        except:
+            self.kit_levels = {}
+            self.kit_dependencies = {}
+            self.production_priority_order = []
+        # Load kit line match data
+        try:
+            kit_line_match = extract.read_kit_line_match_data()
+            kit_line_match_dict = kit_line_match.set_index("kit_name")["line_type"].to_dict()
+            # Create line name to ID mapping
+            line_name_to_id = {
+                "long line": LineType.LONG_LINE,
+                "mini load": LineType.MINI_LOAD
+            }
+            # Convert line names to IDs
+            self.kit_line_match_dict = {}
+            for kit_name, line_name in kit_line_match_dict.items():
+                self.kit_line_match_dict[kit_name] = line_name_to_id.get(line_name.lower(), line_name)
+        except:
+            self.kit_line_match_dict = {}
+        # Load product and demand data
+        try:
+            from src.demand_filtering import DemandFilter
+            filter_instance = DemandFilter()
+            filter_instance.load_data(force_reload=True)
+            self.product_list = filter_instance.get_filtered_product_list()
+            self.demand_dictionary = filter_instance.get_filtered_demand_dictionary()
+        except:
+            self.product_list = []
+            self.demand_dictionary = {}
+        # Load team requirements
+        try:
+            print("\n[DEBUG] Loading team requirements from Kits Calculation...")
+            kits_df = extract.read_personnel_requirement_data()
+            print(f"[DEBUG] Loaded kits_df with {len(kits_df)} rows")
+            print(f"[DEBUG] Columns: {list(kits_df.columns)}")
+            # Initialize team requirements dictionary
+            self.team_req_per_product = {
+                "UNICEF Fixed term": {},
+                "Humanizer": {}
+            }
+            # Process each product in the product list
+            for product in self.product_list:
+                product_data = kits_df[kits_df['Kit'] == product]
+                if not product_data.empty:
+                    # Extract Humanizer and UNICEF staff requirements
+                    humanizer_req = product_data["Humanizer"].iloc[0]
+                    unicef_req = product_data["UNICEF staff"].iloc[0]
+                    # Convert to int (data is already cleaned in extract function)
+                    self.team_req_per_product["Humanizer"][product] = int(humanizer_req)
+                    self.team_req_per_product["UNICEF Fixed term"][product] = int(unicef_req)
+                else:
+                    print(f"[WARN] Product {product} not found in Kits Calculation, setting requirements to 0")
+                    self.team_req_per_product["Humanizer"][product] = 0
+                    self.team_req_per_product["UNICEF Fixed term"][product] = 0
+            print(f"\n[DEBUG] team_req_per_product keys after loading:")
+            for key in self.team_req_per_product.keys():
+                product_count = len(self.team_req_per_product[key])
+                print(f"  - '{key}' (len={len(key)}, {product_count} products)")
+        except Exception as e:
+            print(f"[ERROR] Failed to load team requirements: {e}")
+            import traceback
+            traceback.print_exc()
+            self.team_req_per_product = {}
+        # Load product speed data
+        try:
+            self.per_product_speed = extract.read_package_speed_data()
+        except:
+            self.per_product_speed = {}
+        print("✅ All data loaded successfully")
+    def build_lines(self):
+        """Build line instances from session state configuration"""
+        line_tuples = []
+        try:
+            import streamlit as st
+            # Get selected line types from Data Selection tab
+            selected_lines = st.session_state.selected_lines
+            # Get line counts from Operations tab
+            line_counts = st.session_state.line_counts
+            print(f"Using lines from session state - selected: {selected_lines}, counts: {line_counts}")
+            for line_type in selected_lines:
+                count = line_counts.get(line_type, 0)
+                for i in range(1, count + 1):
+                    line_tuples.append((line_type, i))
+            return line_tuples
+        except Exception as e:
+            print(f"Could not get line config from session state: {e}")
+            # Fallback: Use default values
+            print("Falling back to default line configuration")
+            default_selected_lines = [LineType.LONG_LINE, LineType.MINI_LOAD]
+            default_line_counts = {
+                LineType.LONG_LINE: DefaultConfig.LINE_COUNT_LONG_LINE,
+                LineType.MINI_LOAD: DefaultConfig.LINE_COUNT_MINI_LOAD
+            }
+            for line_type in default_selected_lines:
+                count = default_line_counts.get(line_type, 0)
+                for i in range(1, count + 1):
+                    line_tuples.append((line_type, i))
+            return line_tuples
+    def run_optimization(self):
+        """Run the main optimization algorithm"""
+        # *** CRITICAL: Load fresh data to reflect current Streamlit configs ***
+        print("\n" + "="*60)
+        print("🔄 LOADING FRESH DATA FOR OPTIMIZATION")
+        print("="*60)
+        print(f"📦 LOADED PRODUCTS: {len(self.product_list)} products")
+        print(f"📈 LOADED DEMAND: {sum(self.demand_dictionary.values())} total units")
+        print(f"👥 LOADED TEAM REQUIREMENTS: {len(self.team_req_per_product)} employee types")
+        # Debug: Print team requirements keys
+        print("\n[DEBUG] team_req_per_product employee types:")
+        for emp_type in self.team_req_per_product.keys():
+            print(f"  - '{emp_type}'")
+        print("\n[DEBUG] self.employee_type_list:")
+        for emp_type in self.employee_type_list:
+            print(f"  - '{emp_type}'")
+        # Build ACTIVE schedule for fresh product list
+        ACTIVE = {t: {p: 1 for p in self.product_list} for t in self.date_span}
+        # --- Sets ---
+        date_span_list = list(self.date_span)
+        employee_type_list = self.employee_type_list
+        active_shift_list = self.active_shift_list
+        print(f"\n[DEBUG] employee_type_list: {employee_type_list}")
+        print(f"[DEBUG] active_shift_list: {active_shift_list}")
+        # *** HIERARCHY SORTING: Sort products by production priority ***
+        print("\n" + "="*60)
+        print("🔗 APPLYING HIERARCHY-BASED PRODUCTION ORDERING")
+        print("="*60)
+        sorted_product_list = sort_products_by_hierarchy(list(self.product_list), self.kit_levels, self.kit_dependencies)
+        line_tuples = self.build_lines()
+        print("Lines", line_tuples)
+        print("PER_PRODUCT_SPEED", self.per_product_speed)
+        # --- Short aliases for parameters ---
+        print("\n[DEBUG] Creating variable aliases...")
+        Hmax_s = dict(self.max_hours_shift)  # per-shift hours
+        Hmax_daily = self.max_hour_per_person_per_day
+        max_workers_line = dict(self.max_parallel_workers)  # per line type
+        max_employee_type_day = self.max_employee_per_type_on_day  # {emp_type:{t:headcount}}
+        cost = self.cost_list_per_emp_shift  # {emp_type:{shift:cost}}
+        # Create aliases for data dictionaries
+        TEAM_REQ_PER_PRODUCT = self.team_req_per_product
+        DEMAND_DICTIONARY = self.demand_dictionary
+        KIT_LINE_MATCH_DICT = self.kit_line_match_dict
+        KIT_LEVELS = self.kit_levels
+        KIT_DEPENDENCIES = self.kit_dependencies
+        PER_PRODUCT_SPEED = self.per_product_speed
+        FIXED_MIN_UNICEF_PER_DAY = self.fixed_min_unicef_per_day
+        PAYMENT_MODE_CONFIG = self.payment_mode_config
+        # Mock missing config variables (if they exist in config, they'll be overridden)
+        EVENING_SHIFT_MODE = "normal"
+        EVENING_SHIFT_DEMAND_THRESHOLD = 0.9
+        print(f"[DEBUG] TEAM_REQ_PER_PRODUCT has {len(TEAM_REQ_PER_PRODUCT)} employee types")
+        print(f"[DEBUG] employee_type_list has {len(employee_type_list)} types")
+        # --- Feasibility quick checks ---
+        print("\n[DEBUG] Starting feasibility checks...")
+        # 1) If team size is greater than max_workers_line, block the product-line type combination
+        for i, p in enumerate(sorted_product_list):
+            print(f"[DEBUG] Checking product {i+1}/{len(sorted_product_list)}: {p}")
+            # Check if all employee types exist in TEAM_REQ_PER_PRODUCT
+            for e in employee_type_list:
+                if e not in TEAM_REQ_PER_PRODUCT:
+                    print(f"[ERROR] Employee type '{e}' not found in TEAM_REQ_PER_PRODUCT!")
+                    print(f"[ERROR] Available keys: {list(TEAM_REQ_PER_PRODUCT.keys())}")
+                    raise KeyError(f"Employee type '{e}' not in team requirements data")
+                if p not in TEAM_REQ_PER_PRODUCT[e]:
+                    print(f"[ERROR] Product '{p}' not found in TEAM_REQ_PER_PRODUCT['{e}']!")
+                    raise KeyError(f"Product '{p}' not in team requirements for employee type '{e}'")
+            req_total = sum(TEAM_REQ_PER_PRODUCT[e][p] for e in employee_type_list)
+            print(f"[DEBUG]   req_total: {req_total}")
+            lt = KIT_LINE_MATCH_DICT.get(p, 6)  # Default to long line (6) if not found
+            if p not in KIT_LINE_MATCH_DICT:
+                print(f"[WARN] Product {p}: No line type mapping found, defaulting to long line (6)")
+            if req_total > max_workers_line.get(lt, 1e9):
+                print(f"[WARN] Product {p}: team size {req_total} > MAX_PARALLEL_WORKERS[{lt}] "
+                      f"= {max_workers_line.get(lt)}. Blocked.")
+        # 2) Check if demand can be met without evening shift (only if in normal mode)
+        if EVENING_SHIFT_MODE == "normal":
+            total_demand = sum(DEMAND_DICTIONARY.get(p, 0) for p in sorted_product_list)
+            # Calculate maximum capacity with regular + overtime shifts only
+            regular_overtime_shifts = [s for s in active_shift_list if s in ShiftType.REGULAR_AND_OVERTIME]
+            max_capacity = 0
+            for p in sorted_product_list:
+                if p in PER_PRODUCT_SPEED:
+                    product_speed = PER_PRODUCT_SPEED[p]  # units per hour
+                    # Calculate max hours available for this product across all lines and shifts
+                    max_hours_per_product = 0
+                    for ell in line_tuples:
+                        for s in regular_overtime_shifts:
+                            for t in date_span_list:
+                                max_hours_per_product += Hmax_s[s]
+                    max_capacity += product_speed * max_hours_per_product
+            capacity_ratio = max_capacity / total_demand if total_demand > 0 else float('inf')
+            print(f"[CAPACITY CHECK] Total demand: {total_demand}")
+            print(f"[CAPACITY CHECK] Max capacity (Regular + Overtime): {max_capacity:.1f}")
+            print(f"[CAPACITY CHECK] Capacity ratio: {capacity_ratio:.2f}")
+            if capacity_ratio < EVENING_SHIFT_DEMAND_THRESHOLD:
+                print(f"\n🚨 [ALERT] DEMAND TOO HIGH!")
+                print(f"   Current capacity can only meet {capacity_ratio*100:.1f}% of demand")
+                print(f"   Threshold: {EVENING_SHIFT_DEMAND_THRESHOLD*100:.1f}%")
+                print(f"   RECOMMENDATION: Change EVENING_SHIFT_MODE to 'activate_evening' to enable evening shift")
+                print(f"   This will add shift 3 to increase capacity\n")
+        # --- Solver ---
+        solver = pywraplp.Solver.CreateSolver('CBC')
+        if not solver:
+            raise RuntimeError("CBC solver not found.")
+        INF = solver.infinity()
+        # --- Variables ---
+        # Assignment[p,ell,s,t] ∈ {0,1}: 1 if product p runs on (line,shift,day)
+        Assignment, Hours, Units = {}, {}, {}  # Hours: run hours, Units: production units
+        for p in sorted_product_list:
+            for ell in line_tuples:     # ell = (line_type_id, idx)
+                for s in active_shift_list:
+                    for t in date_span_list:
+                        #Is product p assigned to run on line ell, during shift s, on day t?
+                        Assignment[p, ell, s, t] = solver.BoolVar(f"Z_{p}_{ell[0]}_{ell[1]}_s{s}_d{t}")
+                        #How many hours does product p run on line ell, during shift s, on day t?
+                        Hours[p, ell, s, t] = solver.NumVar(0, Hmax_s[s], f"T_{p}_{ell[0]}_{ell[1]}_s{s}_d{t}")
+                        #How many units does product p run on line ell, during shift s, on day t?
+                        Units[p, ell, s, t] = solver.NumVar(0, INF,       f"U_{p}_{ell[0]}_{ell[1]}_s{s}_d{t}")
+        # Note: IDLE variables removed - we only track employees actually working on production
+        # Variable to track actual number of employees of each type working each shift each day
+        # This represents how many distinct employees of type e are working in shift s on day t
+        EMPLOYEE_COUNT = {}
+        for e in employee_type_list:
+            for s in active_shift_list:
+                for t in date_span_list:
+                    # Note: Minimum staffing is per day, not per shift
+                    # We'll handle the daily minimum constraint separately
+                    max_count = max_employee_type_day.get(e, {}).get(t, 100)
+                    EMPLOYEE_COUNT[e, s, t] = solver.IntVar(
+                        0,  # No minimum per shift (daily minimum handled separately)
+                        max_count,
+                        f"EmpCount_{e}_s{s}_day{t}"
+                    )
+        # Track total person-hours worked by each employee type per shift per day
+        # This is needed for employee-centric wage calculation
+        EMPLOYEE_HOURS = {}
+        for e in employee_type_list:
+            for s in active_shift_list:
+                for t in date_span_list:
+                    # Sum of all work hours for employee type e in shift s on day t
+                    # This represents total person-hours (e.g., 5 employees × 8 hours = 40 person-hours)
+                    EMPLOYEE_HOURS[e, s, t] = solver.Sum(
+                        TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t]
+                        for p in sorted_product_list
+                        for ell in line_tuples
+                    )
+        # Note: Binary variables for bulk payment are now created inline in the cost calculation
+        # --- Objective: Minimize total labor cost (wages) ---
+        # Employee-centric approach: calculate wages based on actual employees and their hours
+        print(f"\n[DEBUG] Payment mode configuration: {PAYMENT_MODE_CONFIG}")
+        # Build cost terms based on payment mode
+        cost_terms = []
+        for e in employee_type_list:
+            for s in active_shift_list:
+                for t in date_span_list:
+                    payment_mode = PAYMENT_MODE_CONFIG.get(s, "partial")  # Default to partial if not specified
+                    if payment_mode == "partial":
+                        # Partial payment: pay for actual person-hours worked
+                        # Cost = hourly_rate × total_person_hours
+                        # Example: $20/hr × 40 person-hours = $800
+                        cost_terms.append(cost[e][s] * EMPLOYEE_HOURS[e, s, t])
+                    elif payment_mode == "bulk":
+                        # Bulk payment: if ANY work happens in shift, pay ALL working employees for FULL shift
+                        # We need to know: did employee type e work at all in shift s on day t?
+                        # Create binary: 1 if employee type e worked in this shift
+                        work_in_shift = solver.BoolVar(f"work_{e}_s{s}_d{t}")
+                        # Link binary to work hours
+                        # If EMPLOYEE_HOURS > 0, then work_in_shift = 1
+                        # If EMPLOYEE_HOURS = 0, then work_in_shift = 0
+                        max_possible_hours = Hmax_s[s] * max_employee_type_day[e][t]
+                        solver.Add(EMPLOYEE_HOURS[e, s, t] <= max_possible_hours * work_in_shift)
+                        solver.Add(work_in_shift * 0.001 <= EMPLOYEE_HOURS[e, s, t])
+                        # Calculate number of employees working in this shift
+                        # This is approximately: ceil(EMPLOYEE_HOURS / Hmax_s[s])
+                        # But we can use: employees_working_in_shift
+                        # For simplicity, use EMPLOYEE_HOURS / Hmax_s[s] as continuous approximation
+                        # Or better: create a variable for employees per shift
+                        # Simpler approach: For bulk payment, assume if work happens,
+                        # we need approximately EMPLOYEE_HOURS/Hmax_s[s] employees,
+                        # and each gets paid for full shift
+                        # Cost ≈ (EMPLOYEE_HOURS / Hmax_s[s]) × Hmax_s[s] × hourly_rate = EMPLOYEE_HOURS × hourly_rate
+                        # But that's the same as partial! The difference is we round up employees.
+                        # Better approach: Create variable for employees working in this specific shift
+                        employees_in_shift = solver.IntVar(0, max_employee_type_day[e][t], f"emp_{e}_s{s}_d{t}")
+                        # Link employees_in_shift to work requirements
+                        # If EMPLOYEE_HOURS requires N employees, then employees_in_shift >= ceil(N)
+                        solver.Add(employees_in_shift * Hmax_s[s] >= EMPLOYEE_HOURS[e, s, t])
+                        # Cost: pay each employee for full shift
+                        cost_terms.append(cost[e][s] * Hmax_s[s] * employees_in_shift)
+        # Note: No idle employee costs - only pay for employees actually working
+        total_cost = solver.Sum(cost_terms)
+        # Objective: minimize total labor cost (wages)
+        # This finds the optimal production schedule (product order, line assignment, timing)
+        # that minimizes total wages while meeting all demand and capacity constraints
+        solver.Minimize(total_cost)
+        # --- Constraints ---
+        # 1) Weekly demand - must meet exactly (no over/under production)
+        for p in sorted_product_list:
+            total_production = solver.Sum(Units[p, ell, s, t] for ell in line_tuples for s in active_shift_list for t in date_span_list)
+            demand = DEMAND_DICTIONARY.get(p, 0)
+            # Must produce at least the demand
+            solver.Add(total_production >= demand)
+            # Must not produce more than the demand (prevent overproduction)
+            solver.Add(total_production <= demand)
+        # 2) One product per (line,shift,day) + time gating
+        for ell in line_tuples:
+            for s in active_shift_list:
+                for t in date_span_list:
+                    solver.Add(solver.Sum(Assignment[p, ell, s, t] for p in sorted_product_list) <= 1)
+                    for p in sorted_product_list:
+                        solver.Add(Hours[p, ell, s, t] <= Hmax_s[s] * Assignment[p, ell, s, t])
+        # 3) Product-line type compatibility + (optional) activity by day
+        for p in sorted_product_list:
+            req_lt = KIT_LINE_MATCH_DICT.get(p, LineType.LONG_LINE)  # Default to long line if not found
+            req_total = sum(TEAM_REQ_PER_PRODUCT[e][p] for e in employee_type_list)
+            for ell in line_tuples:
+                allowed = (ell[0] == req_lt) and (req_total <= max_workers_line.get(ell[0], 1e9))
+                for s in active_shift_list:
+                    for t in date_span_list:
+                        if ACTIVE[t][p] == 0 or not allowed:
+                            solver.Add(Assignment[p, ell, s, t] == 0)
+                            solver.Add(Hours[p, ell, s, t] == 0)
+                            solver.Add(Units[p, ell, s, t] == 0)
+        # 4) Line throughput: Units ≤ product_speed * Hours
+        for p in sorted_product_list:
+            for ell in line_tuples:
+                for s in active_shift_list:
+                    for t in date_span_list:
+                        # Get product speed (same speed regardless of line type)
+                        if p in PER_PRODUCT_SPEED:
+                            # Convert kit per day to kit per hour (assuming 7.5 hour workday)
+                            speed = PER_PRODUCT_SPEED[p]
+                            # Upper bound: units cannot exceed capacity
+                            solver.Add(
+                                Units[p, ell, s, t] <= speed * Hours[p, ell, s, t]
+                            )
+                            # Lower bound: if working, must produce (prevent phantom work)
+                            solver.Add(
+                                Units[p, ell, s, t] >= speed * Hours[p, ell, s, t]
+                            )
+                        else:
+                            # Default speed if not found
+                            default_speed = 800 / 7.5  # units per hour
+                            print(f"Warning: No speed data for product {p}, using default {default_speed:.1f} per hour")
+                            # Upper bound: units cannot exceed capacity
+                            solver.Add(
+                                Units[p, ell, s, t] <= default_speed * Hours[p, ell, s, t]
+                            )
+                            # Lower bound: if working, must produce (prevent phantom work)
+                            solver.Add(
+                                Units[p, ell, s, t] >= default_speed * Hours[p, ell, s, t]
+                            )
+        # Working hours constraint: active employees cannot exceed shift hour capacity
+        for e in employee_type_list:
+            for s in active_shift_list:
+                for t in date_span_list:
+                    # No idle employee constraints - employees are only counted when working
+                    solver.Add(
+                        solver.Sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t] for p in sorted_product_list for ell in line_tuples)
+                        <= Hmax_s[s] * max_employee_type_day[e][t]
+                    )
+        # 6) Per-shift staffing capacity by type: link employee count to actual work hours
+        # This constraint ensures EMPLOYEE_COUNT[e,s,t] represents the actual number of employees needed in each shift
+        for e in employee_type_list:
+            for s in active_shift_list:
+                for t in date_span_list:
+                    # Total person-hours worked by employee type e in shift s on day t
+                    total_person_hours_in_shift = solver.Sum(
+                        TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t]
+                        for p in sorted_product_list
+                        for ell in line_tuples
+                    )
+                    # Employee count must be sufficient to cover the work in this shift
+                    # If employees work H person-hours total and each can work max M hours/shift,
+                    # then we need at least ceil(H/M) employees
+                    # Constraint: employee_count × max_hours_per_shift >= total_person_hours_in_shift
+                    solver.Add(EMPLOYEE_COUNT[e, s, t] * Hmax_s[s] >= total_person_hours_in_shift)
+        # 7) Shift ordering constraints (only apply if shifts are available)
+        # Evening shift after regular shift
+        if ShiftType.EVENING in active_shift_list and ShiftType.REGULAR in active_shift_list:  # Only if both shifts are available
+            for e in employee_type_list:
+                for t in date_span_list:
+                    solver.Add(
+                        solver.Sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, ShiftType.EVENING, t] for p in sorted_product_list for ell in line_tuples)
+                        <=
+                        solver.Sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, ShiftType.REGULAR, t] for p in sorted_product_list for ell in line_tuples)
+                    )
+        # Overtime should only be used when regular shift is at capacity
+        if ShiftType.OVERTIME in active_shift_list and ShiftType.REGULAR in active_shift_list:  # Only if both shifts are available
+            print("\n[OVERTIME] Adding constraints to ensure overtime only when regular shift is insufficient...")
+            for e in employee_type_list:
+                for t in date_span_list:
+                    # Get available regular capacity for this employee type and day
+                    regular_capacity = max_employee_type_day[e][t]
+                    # Total regular shift usage for this employee type and day
+                    regular_usage = solver.Sum(
+                        TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, ShiftType.REGULAR, t]
+                        for p in sorted_product_list for ell in line_tuples
+                    )
+                    # Total overtime usage for this employee type and day
+                    overtime_usage = solver.Sum(
+                        TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, ShiftType.OVERTIME, t]
+                        for p in sorted_product_list for ell in line_tuples
+                    )
+                    # Create binary variable: 1 if using overtime, 0 otherwise
+                    using_overtime = solver.IntVar(0, 1, f'using_overtime_{e}_{t}')
+                    # If using overtime, regular capacity must be utilized significantly
+                    # Regular usage must be at least 90% of capacity to allow overtime
+                    min_regular_for_overtime = int(0.9 * regular_capacity)
+                    # Constraint 1: Can only use overtime if regular usage is high
+                    solver.Add(regular_usage >= min_regular_for_overtime * using_overtime)
+                    # Constraint 2: If any overtime is used, set the binary variable
+                    solver.Add(overtime_usage <= regular_capacity * using_overtime)
+            overtime_constraints_added = len(employee_type_list) * len(date_span_list) * 2  # 2 constraints per employee type per day
+            print(f"[OVERTIME] Added {overtime_constraints_added} constraints ensuring overtime only when regular shifts are at 90%+ capacity")
+        # 7.5) Bulk payment linking constraints are now handled inline in the cost calculation
+        # 7.6) *** FIXED MINIMUM UNICEF EMPLOYEES CONSTRAINT ***
+        # Ensure minimum UNICEF fixed-term staff work in the REGULAR shift every day
+        # The minimum applies to the regular shift specifically (not overtime or evening)
+        if 'UNICEF Fixed term' in employee_type_list and FIXED_MIN_UNICEF_PER_DAY > 0:
+            if ShiftType.REGULAR in active_shift_list:
+                print(f"\n[FIXED STAFFING] Adding constraint for minimum {FIXED_MIN_UNICEF_PER_DAY} UNICEF employees in REGULAR shift per day...")
+                for t in date_span_list:
+                    # At least FIXED_MIN_UNICEF_PER_DAY employees must work in the regular shift each day
+                    solver.Add(
+                        EMPLOYEE_COUNT['UNICEF Fixed term', ShiftType.REGULAR, t] >= FIXED_MIN_UNICEF_PER_DAY
+                    )
+                print(f"[FIXED STAFFING] Added {len(date_span_list)} constraints ensuring >= {FIXED_MIN_UNICEF_PER_DAY} UNICEF employees in regular shift per day")
+            else:
+                print(f"\n[FIXED STAFFING] Warning: Regular shift not available, cannot enforce minimum UNICEF staffing")
+        # 8) *** HIERARCHY DEPENDENCY CONSTRAINTS ***
+        # For subkits with prepack dependencies: dependencies should be produced before or same time
+        print("\n[HIERARCHY] Adding dependency constraints...")
+        dependency_constraints_added = 0
+        for p in sorted_product_list:
+            dependencies = KIT_DEPENDENCIES.get(p, [])
+            if dependencies:
+                # Get the level of the current product
+                p_level = KIT_LEVELS.get(p, 2)
+                for dep in dependencies:
+                    if dep in sorted_product_list:  # Only if dependency is also in production list
+                        # Calculate "completion time" for each product (sum of all production times)
+                        p_completion = solver.Sum(
+                            t * Hours[p, ell, s, t] for ell in line_tuples for s in active_shift_list for t in date_span_list
+                        )
+                        dep_completion = solver.Sum(
+                            t * Hours[dep, ell, s, t] for ell in line_tuples for s in active_shift_list for t in date_span_list
+                        )
+                        # Dependency should complete before or at the same time
+                        solver.Add(dep_completion <= p_completion)
+                        dependency_constraints_added += 1
+                        print(f"  Added constraint: {dep} (dependency) <= {p} (level {p_level})")
+        print(f"[HIERARCHY] Added {dependency_constraints_added} dependency constraints")
+        # --- Solve ---
+        status = solver.Solve()
+        if status != pywraplp.Solver.OPTIMAL:
+            status_names = {pywraplp.Solver.INFEASIBLE: "INFEASIBLE", pywraplp.Solver.UNBOUNDED: "UNBOUNDED"}
+            print(f"No optimal solution. Status: {status} ({status_names.get(status, 'UNKNOWN')})")
+            # Debug hint:
+            # solver.EnableOutput()
+            # solver.ExportModelAsLpFile("model.lp")
+            return None
+        # --- Report ---
+        result = {}
+        result['objective'] = solver.Objective().Value()
+        # Weekly production
+        prod_week = {p: sum(Units[p, ell, s, t].solution_value() for ell in line_tuples for s in active_shift_list for t in date_span_list) for p in sorted_product_list}
+        result['weekly_production'] = prod_week
+        # Which product ran on which line/shift/day
+        schedule = []
+        for t in date_span_list:
+            for ell in line_tuples:
+                for s in active_shift_list:
+                    chosen = [p for p in sorted_product_list if Assignment[p, ell, s, t].solution_value() > 0.5]
+                    if chosen:
+                        p = chosen[0]
+                        schedule.append({
+                            'day': t,
+                            'line_type_id': ell[0],
+                            'line_idx': ell[1],
+                            'shift': s,
+                            'product': p,
+                            'run_hours': Hours[p, ell, s, t].solution_value(),
+                            'units': Units[p, ell, s, t].solution_value(),
+                        })
+        result['run_schedule'] = schedule
+        # Implied headcount by type/shift/day (ceil)
+        headcount = []
+        for e in employee_type_list:
+            for s in active_shift_list:
+                for t in date_span_list:
+                    used_ph = sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t].solution_value() for p in sorted_product_list for ell in line_tuples)
+                    need = ceil(used_ph / (Hmax_s[s] + 1e-9))
+                    headcount.append({'emp_type': e, 'shift': s, 'day': t,
+                                    'needed': need, 'available': max_employee_type_day[e][t]})
+        result['headcount_per_shift'] = headcount
+        # Total person-hours by type/day (≤ 14h * headcount)
+        ph_by_day = []
+        for e in employee_type_list:
+            for t in date_span_list:
+                used = sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t].solution_value() for s in active_shift_list for p in sorted_product_list for ell in line_tuples)
+                ph_by_day.append({'emp_type': e, 'day': t,
+                                'used_person_hours': used,
+                                'cap_person_hours': Hmax_daily * max_employee_type_day[e][t]})
+        result['person_hours_by_day'] = ph_by_day
+        # Actual employee count per type/shift/day (from EMPLOYEE_COUNT variable)
+        employee_count_by_shift = []
+        for e in employee_type_list:
+            for s in active_shift_list:
+                for t in date_span_list:
+                    count = int(EMPLOYEE_COUNT[e, s, t].solution_value())
+                    used_hours = sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t].solution_value()
+                                for p in sorted_product_list for ell in line_tuples)
+                    avg_hours_per_employee = used_hours / count if count > 0 else 0
+                    if count > 0:  # Only add entries where employees are working
+                        employee_count_by_shift.append({
+                            'emp_type': e,
+                            'shift': s,
+                            'day': t,
+                            'employee_count': count,
+                            'total_person_hours': used_hours,
+                            'avg_hours_per_employee': avg_hours_per_employee,
+                            'available': max_employee_type_day[e][t]
+                        })
+        result['employee_count_by_shift'] = employee_count_by_shift
+        # Also calculate daily totals (summing across shifts)
+        employee_count_by_day = []
+        for e in employee_type_list:
+            for t in date_span_list:
+                # Sum employees across all shifts for this day
+                total_count = sum(int(EMPLOYEE_COUNT[e, s, t].solution_value()) for s in active_shift_list)
+                used_hours = sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t].solution_value()
+                            for s in active_shift_list for p in sorted_product_list for ell in line_tuples)
+                avg_hours_per_employee = used_hours / total_count if total_count > 0 else 0
+                if total_count > 0:  # Only add days where employees are working
+                    employee_count_by_day.append({
+                        'emp_type': e,
+                        'day': t,
+                        'employee_count': total_count,
+                        'total_person_hours': used_hours,
+                        'avg_hours_per_employee': avg_hours_per_employee,
+                        'available': max_employee_type_day[e][t]
+                    })
+        result['employee_count_by_day'] = employee_count_by_day
+        # Note: Idle employee tracking removed - only counting employees actually working
+        # Pretty print
+        print("Objective (min cost):", result['objective'])
+        print("\n--- Weekly production by product ---")
+        for p, u in prod_week.items():
+            print(f"{p}: {u:.1f} / demand {DEMAND_DICTIONARY.get(p,0)}")
+        print("\n--- Schedule (line, shift, day) ---")
+        for row in schedule:
+            shift_name = ShiftType.get_name(row['shift'])
+            line_name = LineType.get_name(row['line_type_id'])
+            print(f"date_span_list{row['day']} {line_name}-{row['line_idx']} {shift_name}: "
+                f"{row['product']}  Hours={row['run_hours']:.2f}h  Units={row['units']:.1f}")
+        print("\n--- Implied headcount need (per type/shift/day) ---")
+        for row in headcount:
+            shift_name = ShiftType.get_name(row['shift'])
+            print(f"{row['emp_type']}, {shift_name}, date_span_list{row['day']}: "
+                f"need={row['needed']} (avail {row['available']})")
+        print("\n--- Total person-hours by type/day ---")
+        for row in ph_by_day:
+            print(f"{row['emp_type']}, date_span_list{row['day']}: used={row['used_person_hours']:.1f} "
+                f"(cap {row['cap_person_hours']})")
+        print("\n--- Actual employee count by type/shift/day ---")
+        for row in employee_count_by_shift:
+            shift_name = ShiftType.get_name(row['shift'])
+            print(f"{row['emp_type']}, {shift_name}, date_span_list{row['day']}: "
+                f"count={row['employee_count']} employees, "
+                f"total_hours={row['total_person_hours']:.1f}h, "
+                f"avg={row['avg_hours_per_employee']:.1f}h/employee")
+        print("\n--- Daily employee totals by type/day (sum across shifts) ---")
+        for row in employee_count_by_day:
+            print(f"{row['emp_type']}, date_span_list{row['day']}: "
+                f"count={row['employee_count']} employees total, "
+                f"total_hours={row['total_person_hours']:.1f}h, "
+                f"avg={row['avg_hours_per_employee']:.1f}h/employee "
+                f"(available: {row['available']})")
+            # Note: Idle employee reporting removed - only tracking employees actually working
+        return result
+if __name__ == "__main__":
+    optimizer = Optimizer()
+    optimizer.run_optimization()

src/preprocess/__init__.py ADDED Viewed

File without changes

src/preprocess/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (163 Bytes). View file

src/preprocess/__pycache__/extract.cpython-310.pyc ADDED Viewed

Binary file (5.59 kB). View file

src/preprocess/__pycache__/hierarchy_parser.cpython-310.pyc ADDED Viewed

Binary file (6.49 kB). View file

src/preprocess/__pycache__/transform.cpython-310.pyc ADDED Viewed

Binary file (2.46 kB). View file

src/preprocess/data_preprocess.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import pandas as pd
+def process_Kit_Composition_and_relation(output_csv_path: str = 'data/real_data_excel/converted_csv/Kit_Composition_and_relation_cleaned_with_line_type_and_id.csv') -> pd.DataFrame:
+    """
+    Process the Kit_Composition_and_relation.csv file to clean the data and add line type and id.
+    Returns:
+        saves to csv path
+        cleaned_df: pd.DataFrame
+    """
+    df = pd.read_csv('data/real_data_excel/converted_csv/Kit_Composition_and_relation.csv')
+    # df.dropna(inplace=True)
+    master = df[["Master Kit", "Master Kit  Description"]]
+    master["kit_type"] = "master"
+    master.rename(columns={"Master Kit": "kit_name", "Master Kit  Description": "kit_description"}, inplace=True)
+    subkit = df[["Sub kit", "Sub kit description"]]
+    subkit["kit_type"] = "subkit"
+    subkit.rename(columns={"Sub kit": "kit_name", "Sub kit Description": "kit_description"}, inplace=True)
+    subkit.columns = ["kit_name", "kit_description", "kit_type"]
+    prepack = df[["Prepack", "Prepack Description"]]
+    prepack["kit_type"] = "prepack"
+    prepack.rename(columns={"Prepack": "kit_name", "Prepack Description": "kit_description"}, inplace=True)
+    cleaned_df = pd.concat([master, subkit, prepack])
+    cleaned_df[['kit_name','kit_description','kit_type']].drop_duplicates()
+    tmp = cleaned_df.groupby('kit_name').count()['kit_type'].reset_index()
+    standalone_masterkit_list = tmp.loc[tmp['kit_type']==1,'kit_name']
+    cleaned_df.loc[cleaned_df['kit_name'].isin(standalone_masterkit_list),'line_type'] = 'long line'
+    cleaned_df.loc[cleaned_df['kit_type']=='prepack','line_type'] = 'mini load'
+    cleaned_df.loc[cleaned_df['kit_type']=='subkit','line_type'] = 'long line'
+    cleaned_df.loc[cleaned_df['line_type']=='mini load', 'line_id'] = 7
+    cleaned_df.loc[cleaned_df['line_type']=='long line', 'line_id'] = 6
+    cleaned_df.to_csv(output_csv_path, index=False)
+    return cleaned_df

src/preprocess/excel_to_csv_converter.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import pandas as pd
+import os
+from pathlib import Path
+class ExcelToCsvConverter:
+    """
+    Convert an Excel file to CSV files.
+    """
+    def __init__(self, excel_path, output_dir=None):
+        self.excel_path = excel_path
+        self.output_dir = output_dir
+    def convert_excel_to_csv(excel_path, output_dir=None):
+        """
+        Convert each sheet of an Excel file to a separate CSV file.
+        Args:
+            excel_path (str): Path to the Excel file
+            output_dir (str): Output directory for CSV files. If None, uses same directory as Excel file
+        """
+        try:
+            # Set up output directory
+            if output_dir is None:
+                output_dir = os.path.dirname(excel_path)
+            # Create output directory if it doesn't exist
+            Path(output_dir).mkdir(parents=True, exist_ok=True)
+            # Read Excel file
+            excel_file = pd.ExcelFile(excel_path)
+            converted_files = []
+            for i, sheet_name in enumerate(excel_file.sheet_names, 1):
+                # Read the sheet
+                df = pd.read_excel(excel_path, sheet_name=sheet_name)
+                # Create a safe filename for the CSV
+                safe_filename = "".join(c for c in sheet_name if c.isalnum() or c in (' ', '-', '_')).rstrip()
+                #for specific sheet name, save the file name and use it later
+                self.sheet_name = sheet_name
+                self.safe_filename = safe_filename
+                safe_filename = safe_filename.replace(' ', '_')
+                csv_filename = f"{safe_filename}.csv"
+                csv_path = os.path.join(output_dir, csv_filename)
+                # Save as CSV
+                df.to_csv(csv_path, index=False, encoding='utf-8')
+                converted_files.append(csv_path)
+                print(f"✅ {i}. '{sheet_name}' → {csv_filename}")
+                print(f"   - Saved {len(df)} rows, {len(df.columns)} columns")
+            print(f"\n🎉 Successfully converted {len(converted_files)} sheets to CSV files!")
+            return converted_files
+        except Exception as e:
+            print(f"❌ Error converting Excel to CSV: {e}")
+            return None
+    def convert_specific_sheet_to_csv(excel_path, sheet_name, output_dir=None):
+        """
+        Convert a specific sheet of an Excel file to a CSV file.
+        """
+        if output_dir is None:
+            output_dir = os.path.dirname(excel_path)
+        df = pd.read_excel(excel_path, sheet_name=sheet_name)
+        safe_filename = "".join(c for c in sheet_name if c.isalnum() or c in (' ', '-', '_')).rstrip()
+        safe_filename = safe_filename.replace(' ', '_')
+        csv_filename = f"{safe_filename}.csv"
+        csv_path = os.path.join(output_dir, csv_filename)
+        df.to_csv(csv_path, index=False, encoding='utf-8')
+        print(f"✅ {sheet_name} → {csv_filename}")
+        return csv_path
+    def main():
+        """Main function to analyze and convert Excel file"""
+        # Define paths
+        excel_path = "data/real_data_excel/AI Project document.xlsx"
+        output_dir = "data/real_data_excel/converted_csv"
+        # Check if Excel file exists
+        if not os.path.exists(excel_path):
+            print(f"❌ Excel file not found: {excel_path}")
+            return
+        print("=" * 60)
+        print("📊 EXCEL TO CSV CONVERTER")
+        print("=" * 60)
+        # Step 1: Analyze Excel structure
+        sheet_info = analyze_excel_structure(excel_path)
+        if sheet_info is None:
+            return
+        # Step 2: Convert to CSV
+        converted_files = convert_excel_to_csv(excel_path, output_dir)
+        if converted_files:
+            print("\n📂 Converted files:")
+            for file_path in converted_files:
+                print(f"   - {file_path}")
+if __name__ == "__main__":
+    main()

src/preprocess/extract.py ADDED Viewed

	@@ -0,0 +1,194 @@

+import pandas as pd
+import datetime
+from datetime import date, timedelta
+import json
+import os
+import yaml
+from pathlib import Path
+# Load paths configuration
+_config_dir = Path(__file__).parent.parent / "config"
+_paths_file = _config_dir / "paths.yaml"
+with open(_paths_file, 'r', encoding='utf-8') as f:
+    PATHS = yaml.safe_load(f)
+def read_kit_line_match_data() -> pd.DataFrame:
+    """Read kit composition and relation data"""
+    path = PATHS['data']['csv']['kit_composition']
+    return pd.read_csv(path)
+def read_employee_data() -> pd.DataFrame:
+    """Read employee workforce hourly pay scale data"""
+    path = PATHS['data']['csv']['workforce_pay_scale']
+    return pd.read_csv(path)
+def get_shift_info() -> pd.DataFrame:
+    """Read work shift information"""
+    path = PATHS['data']['csv']['work_shift']
+    df = pd.read_csv(path)
+    return df
+def read_shift_cost_data() -> pd.DataFrame:
+    """Read shift cost data from workforce pay scale"""
+    path = PATHS['data']['csv']['workforce_pay_scale']
+    return pd.read_csv(path)
+def read_work_center_capacity() -> pd.DataFrame:
+    """Read work center capacity data"""
+    path = PATHS['data']['csv']['work_center_capacity']
+    return pd.read_csv(path)
+def read_material_master() -> pd.DataFrame:
+    """Read material master WMS data"""
+    path = PATHS['data']['csv']['material_master']
+    return pd.read_csv(path)
+def read_packaging_line_data() -> pd.DataFrame:
+    """Read packaging line data (filtered work center capacity)"""
+    path = PATHS['data']['csv']['work_center_capacity_processed']
+    df = pd.read_csv(path)
+    # Filter for packaging lines only
+    df = df[df["line_for_packaging"] == True]
+    return df
+def read_orders_data(
+    start_date=None,
+    # end_date=None,
+) -> pd.DataFrame:
+    """
+    Read COOIS Released Production Orders data
+    Args:
+        start_date: start date (pd.Timestamp or datetime)
+    Returns:
+        pd.DataFrame: filtered dataframe by date
+    """
+    path = PATHS['data']['csv']['demand']
+    df = pd.read_csv(path)
+    assert len(df) > 0, "No data found in the file"
+    # convert date column to datetime
+    df["Basic start date"] = pd.to_datetime(df["Basic start date"])
+    # filter by date
+    if start_date is not None:    # Filter for exact start date only
+        df = df[df["Basic start date"] == pd.to_datetime(start_date)]
+    else:
+        raise ValueError("start_date is required")
+    return df
+def read_package_speed_data():
+    """Read package speed data from Kits Calculation"""
+    path = PATHS['data']['csv']['kits_calculation']
+    df = pd.read_csv(path, usecols=["Kit", "Kit per day","Paid work hours per day"])
+    df["Kit per day"] = df["Kit per day"].astype(float)
+    df["Paid work hours per day"] = df["Paid work hours per day"].astype(float)
+    df["Kit"] = df["Kit"].astype(str)
+    df['kits_per_hour'] = df['Kit per day']/df['Paid work hours per day']
+    speeds_per_hour = dict(zip(df["Kit"], df["kits_per_hour"]))
+    return speeds_per_hour
+def read_personnel_requirement_data():
+    """Read personnel requirement data from Kits Calculation"""
+    path = PATHS['data']['csv']['kits_calculation']
+    df = pd.read_csv(path, usecols=["Kit", "Humanizer", "UNICEF staff"])
+    # Clean the data by handling special whitespace characters like \xa0 (non-breaking space)
+    def clean_and_convert_to_float(value):
+        if pd.isna(value):
+            return 0.0
+        # Convert to string and strip all kinds of whitespace (including \xa0)
+        clean_value = str(value).strip()
+        # If empty after stripping, return 0
+        if clean_value == '' or clean_value == 'nan':
+            return 0.0
+        try:
+            return float(clean_value)
+        except ValueError as e:
+            print(f"Warning: Could not convert '{repr(value)}' to float, setting to 0. Error: {e}")
+            return 0.0
+    df["Humanizer"] = df["Humanizer"].apply(clean_and_convert_to_float)
+    df["UNICEF staff"] = df["UNICEF staff"].apply(clean_and_convert_to_float)
+    df["Kit"] = df["Kit"].astype(str)
+    return df
+def get_production_order_data():
+    """
+    Extract production order information from hierarchy.
+    Returns:
+        tuple: (kit_levels, dependencies, priority_order)
+            - kit_levels: {kit_id: level} where level 0=prepack, 1=subkit, 2=master
+            - dependencies: {kit_id: [dependency_list]}
+            - priority_order: [kit_ids] sorted by production priority
+    """
+    path = PATHS['data']['hierarchy']['kit_hierarchy']
+    with open(path, 'r', encoding='utf-8') as f:
+        hierarchy = json.load(f)
+    kit_levels = {}
+    dependencies = {}
+    # Process hierarchy to extract levels and dependencies
+    for master_id, master_data in hierarchy.items():
+        # Master kits are level 2
+        kit_levels[master_id] = 2
+        dependencies[master_id] = master_data.get('dependencies', [])
+        # Process subkits (level 1)
+        for subkit_id, subkit_data in master_data.get('subkits', {}).items():
+            kit_levels[subkit_id] = 1
+            dependencies[subkit_id] = subkit_data.get('dependencies', [])
+            # Process prepacks under subkits (level 0)
+            for prepack_id in subkit_data.get('prepacks', []):
+                if prepack_id not in kit_levels:  # Avoid overwriting if already exists
+                    kit_levels[prepack_id] = 0
+                    dependencies[prepack_id] = []
+        # Process direct prepacks under master (level 0)
+        for prepack_id in master_data.get('direct_prepacks', []):
+            if prepack_id not in kit_levels:  # Avoid overwriting if already exists
+                kit_levels[prepack_id] = 0
+                dependencies[prepack_id] = []
+    # Create priority order: prepacks first, then subkits, then masters
+    priority_order = []
+    # Level 0: Prepacks (highest priority)
+    prepacks = [kit for kit, level in kit_levels.items() if level == 0]
+    priority_order.extend(sorted(prepacks))
+    # Level 1: Subkits (medium priority)
+    subkits = [kit for kit, level in kit_levels.items() if level == 1]
+    priority_order.extend(sorted(subkits))
+    # Level 2: Masters (lowest priority)
+    masters = [kit for kit, level in kit_levels.items() if level == 2]
+    priority_order.extend(sorted(masters))
+    return kit_levels, dependencies, priority_order
+if __name__ == "__main__":
+    employee_data = read_employee_data()
+    print("employee data")
+    print(employee_data)
+    print("line speed data",read_package_speed_data())

src/preprocess/hierarchy_parser.py ADDED Viewed

	@@ -0,0 +1,219 @@

+#!/usr/bin/env python3
+"""
+Kit Hierarchy Parser - Converts CSV hierarchy data to optimized formats
+This module provides functions to:
+1. Parse Kit_Composition_and_relation.csv
+2. Generate JSON hierarchy structure
+3. Create production order CSV
+4. Build DAG for optimization constraints
+"""
+import pandas as pd
+import json
+from typing import Dict, List, Tuple, Set
+from collections import defaultdict, deque
+class KitHierarchyParser:
+    """
+    Parses kit composition data and creates hierarchy structures
+    for production order optimization.
+    """
+    def __init__(self, csv_path: str = "data/real_data_excel/converted_csv/Kit_Composition_and_relation.csv"):
+        self.csv_path = csv_path
+        self.df = None
+        self.hierarchy_json = {}
+        self.production_order_csv = []
+        self.dependency_graph = {'nodes': set(), 'edges': set()}
+    def load_data(self):
+        """Load and clean the CSV data"""
+        self.df = pd.read_csv(self.csv_path)
+        print(f"Loaded {len(self.df)} rows from {self.csv_path}")
+    def parse_hierarchy(self) -> Dict:
+        """
+        Parse the hierarchy from CSV into JSON structure
+        Returns: Nested dictionary representing the hierarchy
+        """
+        if self.df is None:
+            self.load_data()
+        # Get unique relationships
+        relationships = self.df[['Master Kit', 'Master Kit  Description',
+                                'Sub kit', 'Sub kit description',
+                                'Prepack', 'Prepack Description']].drop_duplicates()
+        hierarchy = defaultdict(lambda: {
+            'name': '',
+            'type': 'master',
+            'subkits': defaultdict(lambda: {
+                'name': '',
+                'type': 'subkit',
+                'prepacks': [],
+                'dependencies': []
+            }),
+            'dependencies': []
+        })
+        for _, row in relationships.iterrows():
+            master_id = row['Master Kit']
+            master_desc = row['Master Kit  Description']
+            subkit_id = row['Sub kit']
+            subkit_desc = row['Sub kit description']
+            prepack_id = row['Prepack']
+            prepack_desc = row['Prepack Description']
+            if pd.notna(master_id):
+                # Set master info
+                hierarchy[master_id]['name'] = master_desc if pd.notna(master_desc) else ''
+                if pd.notna(subkit_id):
+                    # Set subkit info
+                    hierarchy[master_id]['subkits'][subkit_id]['name'] = subkit_desc if pd.notna(subkit_desc) else ''
+                    # Add subkit to master dependencies
+                    if subkit_id not in hierarchy[master_id]['dependencies']:
+                        hierarchy[master_id]['dependencies'].append(subkit_id)
+                    if pd.notna(prepack_id):
+                        # Set prepack info
+                        if prepack_id not in hierarchy[master_id]['subkits'][subkit_id]['prepacks']:
+                            hierarchy[master_id]['subkits'][subkit_id]['prepacks'].append(prepack_id)
+                        # Add prepack to subkit dependencies
+                        if prepack_id not in hierarchy[master_id]['subkits'][subkit_id]['dependencies']:
+                            hierarchy[master_id]['subkits'][subkit_id]['dependencies'].append(prepack_id)
+                elif pd.notna(prepack_id):
+                    # Handle direct master-prepack relationship (no subkit)
+                    # Add direct_prepacks list to hierarchy if it doesn't exist
+                    if 'direct_prepacks' not in hierarchy[master_id]:
+                        hierarchy[master_id]['direct_prepacks'] = []
+                    # Add prepack directly to master
+                    if prepack_id not in hierarchy[master_id]['direct_prepacks']:
+                        hierarchy[master_id]['direct_prepacks'].append(prepack_id)
+                    # Add prepack to master dependencies
+                    if prepack_id not in hierarchy[master_id]['dependencies']:
+                        hierarchy[master_id]['dependencies'].append(prepack_id)
+        # Convert defaultdict to regular dict for JSON serialization
+        self.hierarchy_json = json.loads(json.dumps(hierarchy, default=dict))
+        return self.hierarchy_json
+def sort_products_by_hierarchy(product_list: List[str],
+                                kit_levels: Dict[str, int],
+                                kit_dependencies: Dict[str, List[str]]) -> List[str]:
+    """
+    Sort products by hierarchy levels and dependencies using topological sorting.
+    Returns products in optimal production order: prepacks → subkits → masters
+    Dependencies within the same level are properly ordered.
+    Args:
+        product_list: List of product names to sort
+        kit_levels: Dictionary mapping product names to hierarchy levels (0=prepack, 1=subkit, 2=master)
+        kit_dependencies: Dictionary mapping product names to their dependencies (products that must be made first)
+    Returns:
+        List of products sorted in production order (dependencies first)
+    """
+    # Filter products that are in our production list and have hierarchy data
+    products_with_hierarchy = [p for p in product_list if p in kit_levels]
+    products_without_hierarchy = [p for p in product_list if p not in kit_levels]
+    if products_without_hierarchy:
+        print(f"[HIERARCHY] Products without hierarchy data: {products_without_hierarchy}")
+    # Build dependency graph for products in our list
+    graph = defaultdict(list)  # product -> [dependents]
+    in_degree = defaultdict(int)  # product -> number of dependencies
+    # Initialize all products
+    for product in products_with_hierarchy:
+        in_degree[product] = 0
+    for product in products_with_hierarchy:
+        deps = kit_dependencies.get(product, [])  # dependencies = products that has to be packed first
+        for dep in deps:
+            if dep in products_with_hierarchy:  # Only if dependency is in our production list
+                # REVERSE THE RELATIONSHIP:
+                # kit_dependencies says: "product needs dep"
+                # graph says: "dep is needed by product"
+                graph[dep].append(product)  # dep -> product (reverse the relationship!)
+                in_degree[product] += 1
+    # Topological sort with hierarchy level priority
+    sorted_products = []
+    # queue = able to remove from both sides
+    queue = deque()
+    # Start with products that have no dependencies
+    for product in products_with_hierarchy:
+        if in_degree[product] == 0:
+            queue.append(product)
+    while queue:
+        current = queue.popleft()
+        sorted_products.append(current)
+        # Process dependents - sort by hierarchy level first
+        for dependent in sorted(graph[current], key=lambda p: (kit_levels.get(p, 999), p)):
+            in_degree[dependent] -= 1  # decrement the in_degree of the dependent
+            if in_degree[dependent] == 0:  # if the in_degree of the dependent is 0, add it to the queue so that it can be processed
+                queue.append(dependent)
+    # Check for cycles (shouldn't happen with proper hierarchy)
+    if len(sorted_products) != len(products_with_hierarchy):
+        remaining = [p for p in products_with_hierarchy if p not in sorted_products]
+        print(f"[HIERARCHY] WARNING: Potential circular dependencies detected in: {remaining}")
+        # Add remaining products sorted by level as fallback
+        remaining_sorted = sorted(remaining, key=lambda p: (kit_levels.get(p, 999), p))
+        sorted_products.extend(remaining_sorted)
+    # Add products without hierarchy information at the end
+    sorted_products.extend(sorted(products_without_hierarchy))
+    print(f"[HIERARCHY] Dependency-aware production order: {len(sorted_products)} products")
+    for i, p in enumerate(sorted_products[:10]):  # Show first 10
+        level = kit_levels.get(p, "unknown")
+        # Import here to avoid circular dependency
+        try:
+            from src.config.constants import KitLevel
+            level_name = KitLevel.get_name(level)
+        except:
+            level_name = f"level_{level}"
+        deps = kit_dependencies.get(p, [])
+        deps_in_list = [d for d in deps if d in products_with_hierarchy]
+        print(f"  {i+1}. {p} (level {level}={level_name}, deps: {len(deps_in_list)})")
+        if deps_in_list:
+            print(f"      Dependencies: {deps_in_list}")
+    if len(sorted_products) > 10:
+        print(f"  ... and {len(sorted_products) - 10} more products")
+    return sorted_products
+def main():
+    """Demo the hierarchy parser"""
+    parser = KitHierarchyParser()
+    print("🔄 Parsing kit hierarchy...")
+    hierarchy = parser.parse_hierarchy()
+    #export to json
+    with open('data/hierarchy_exports/kit_hierarchy.json', 'w') as f:
+        json.dump(hierarchy, f,indent=4)
+    print(f"📊 Found {len(hierarchy)} master kits")
+if __name__ == "__main__":
+    main()

src/preprocess/kit_composition_cleaner.py ADDED Viewed

	@@ -0,0 +1,259 @@

+"""
+Kit Composition Data Cleaner
+This script converts the Kit_Composition_and_relation.csv file into a cleaned format
+with line types according to the following rules:
+1. Master Kits:
+   - If appears only once (standalone master): line_type = "long line"
+   - If appears multiple times: line_type = "" (empty/theoretical)
+2. Sub Kits:
+   - All sub kits get line_type = "long line"
+3. Prepacks:
+   - All prepacks get line_type = "miniload"
+The output includes columns: kit_name, kit_description, kit_type, line_type
+"""
+import pandas as pd
+import os
+from typing import Tuple
+class KitCompositionCleaner:
+    """
+    Cleans and processes kit composition data with line type assignments.
+    This class maintains state across processing steps, allowing for:
+    - Single data load
+    - Step-by-step processing
+    - Intermediate result storage
+    """
+    def __init__(self, input_file: str, output_file: str = None):
+        """
+        Initialize the cleaner with file paths.
+        Args:
+            input_file: Path to input CSV file (Kit_Composition_and_relation.csv)
+            output_file: Path to output CSV file (optional, can be set later)
+        """
+        self.input_file = input_file
+        self.output_file = output_file
+        # State variables for processing pipeline
+        self.df = None
+        self.master_df = None
+        self.subkit_df = None
+        self.prepack_df = None
+        self.final_df = None
+    def load_data(self) -> pd.DataFrame:
+        """Load the Kit Composition and relation CSV file."""
+        if not os.path.exists(self.input_file):
+            raise FileNotFoundError(f"File not found: {self.input_file}")
+        self.df = pd.read_csv(self.input_file)
+        print(f"Loaded {len(self.df)} rows from {self.input_file}")
+        return self.df
+    def process_master_kits(self) -> pd.DataFrame:
+        """
+        Process Master Kits according to business rules:
+        - Standalone masters (no subkits/prepacks, only components): line_type = "long line"
+        - Non-standalone masters (have subkits/prepacks): line_type = "" (empty - no production needed)
+        """
+        if self.df is None:
+            raise ValueError("Data not loaded. Call load_data() first.")
+        print("Processing Master Kits...")
+        # Identify masters with hierarchy (subkits or prepacks)
+        masters_with_subkits = set(self.df[self.df['Sub kit'].notna()]['Master Kit'].unique())
+        masters_with_prepacks = set(self.df[self.df['Prepack'].notna()]['Master Kit'].unique())
+        masters_with_hierarchy = masters_with_subkits.union(masters_with_prepacks)
+        # All masters
+        all_masters = set(self.df['Master Kit'].unique())
+        # Standalone masters are those WITHOUT subkits/prepacks (only have components)
+        standalone_masters = all_masters - masters_with_hierarchy
+        print(f"Total unique Master Kits: {len(all_masters)}")
+        print(f"Masters with subkits/prepacks: {len(masters_with_hierarchy)}")
+        print(f"Standalone masters (only components): {len(standalone_masters)}")
+        # Create master kit records
+        master_data = []
+        # Get unique master kits with descriptions
+        unique_masters = self.df[['Master Kit', 'Master Kit  Description']].drop_duplicates()
+        for _, row in unique_masters.iterrows():
+            master_kit = row['Master Kit']
+            master_desc = row['Master Kit  Description']
+            # Determine line_type based on standalone status
+            if master_kit in standalone_masters:
+                line_type = "long line"
+            else:
+                line_type = ""  # Empty for non-standalone (theoretical)
+            master_data.append({
+                'kit_name': master_kit,
+                'kit_description': master_desc,
+                'kit_type': 'master',
+                'line_type': line_type
+            })
+        self.master_df = pd.DataFrame(master_data)
+        return self.master_df
+    def process_sub_kits(self) -> pd.DataFrame:
+        """
+        Process Sub Kits according to business rules:
+        - All sub kits get line_type = "long line"
+        - Remove duplicates
+        """
+        if self.df is None:
+            raise ValueError("Data not loaded. Call load_data() first.")
+        print("Processing Sub Kits...")
+        # Filter rows that have sub kits
+        subkit_df = self.df[self.df['Sub kit'].notna()].copy()
+        if len(subkit_df) == 0:
+            print("No sub kits found")
+            self.subkit_df = pd.DataFrame(columns=['kit_name', 'kit_description', 'kit_type', 'line_type'])
+            return self.subkit_df
+        # Get unique sub kits with descriptions
+        unique_subkits = subkit_df[['Sub kit', 'Sub kit description']].drop_duplicates()
+        subkit_data = []
+        for _, row in unique_subkits.iterrows():
+            subkit_data.append({
+                'kit_name': row['Sub kit'],
+                'kit_description': row['Sub kit description'],
+                'kit_type': 'subkit',
+                'line_type': 'long line'
+            })
+        self.subkit_df = pd.DataFrame(subkit_data)
+        print(f"Created {len(self.subkit_df)} sub kit records")
+        return self.subkit_df
+    def process_prepacks(self) -> pd.DataFrame:
+        """
+        Process Prepacks according to business rules:
+        - All prepacks get line_type = "miniload"
+        - Remove duplicates
+        """
+        if self.df is None:
+            raise ValueError("Data not loaded. Call load_data() first.")
+        print("Processing Prepacks...")
+        # Filter rows that have prepacks
+        prepack_df = self.df[self.df['Prepack'].notna()].copy()
+        if len(prepack_df) == 0:
+            print("No prepacks found")
+            self.prepack_df = pd.DataFrame(columns=['kit_name', 'kit_description', 'kit_type', 'line_type'])
+            return self.prepack_df
+        # Get unique prepacks with descriptions
+        unique_prepacks = prepack_df[['Prepack', 'Prepack Description']].drop_duplicates()
+        prepack_data = []
+        for _, row in unique_prepacks.iterrows():
+            prepack_data.append({
+                'kit_name': row['Prepack'],
+                'kit_description': row['Prepack Description'],
+                'kit_type': 'prepack',
+                'line_type': 'miniload'
+            })
+        self.prepack_df = pd.DataFrame(prepack_data)
+        print(f"Created {len(self.prepack_df)} prepack records")
+        return self.prepack_df
+    def concatenate_and_save(self, output_path: str = None) -> pd.DataFrame:
+        """
+        Concatenate all processed dataframes and save to output file.
+        Args:
+            output_path: Path to save the output file (uses self.output_file if not provided)
+        """
+        if self.master_df is None or self.subkit_df is None or self.prepack_df is None:
+            raise ValueError("Processing not complete. Run process_master_kits(), process_sub_kits(), and process_prepacks() first.")
+        print("Concatenating results...")
+        # Concatenate all dataframes
+        self.final_df = pd.concat([self.master_df, self.subkit_df, self.prepack_df], ignore_index=True)
+        # Ensure empty strings instead of NaN for line_type
+        self.final_df['line_type'] = self.final_df['line_type'].fillna('')
+        # Sort by kit_type for better organization
+        self.final_df = self.final_df.sort_values(['kit_type', 'kit_name']).reset_index(drop=True)
+        print(f"Final dataset contains {len(self.final_df)} records:")
+        print(f"  - Masters: {len(self.master_df)}")
+        print(f"  - Subkits: {len(self.subkit_df)}")
+        print(f"  - Prepacks: {len(self.prepack_df)}")
+        # Determine output path
+        save_path = output_path or self.output_file
+        if save_path is None:
+            raise ValueError("No output path provided. Specify output_path parameter or set self.output_file")
+        # Save to file (keep empty strings as empty, not NaN)
+        self.final_df.to_csv(save_path, index=False, na_rep='')
+        print(f"Saved cleaned data to: {save_path}")
+        return self.final_df
+def main():
+    """Main function to execute the kit composition cleaning process."""
+    # Define file paths
+    base_dir = "/Users/halimjun/Coding_local/SD_roster_real"
+    input_file = os.path.join(base_dir, "data/real_data_excel/converted_csv/Kit_Composition_and_relation.csv")
+    output_file = os.path.join(base_dir, "data/real_data_excel/converted_csv/Kit_Composition_and_relation_cleaned_with_line_type.csv")
+    try:
+        # Initialize cleaner with class
+        cleaner = KitCompositionCleaner(input_file, output_file)
+        # Execute pipeline step by step
+        cleaner.load_data()
+        cleaner.process_master_kits()
+        cleaner.process_sub_kits()
+        cleaner.process_prepacks()
+        final_df = cleaner.concatenate_and_save()
+        # Display summary statistics
+        print("Line type distribution:")
+        print(final_df['line_type'].value_counts(dropna=False))
+        print("\nKit type distribution:")
+        print(final_df['kit_type'].value_counts())
+        print("\nSample of final data:")
+        print(final_df.head(10))
+    except Exception as e:
+        print(f"❌ Error processing kit composition data: {e}")
+        raise
+if __name__ == "__main__":
+    main()

src/preprocess/transform.py ADDED Viewed

	@@ -0,0 +1,79 @@

+import pandas as pd
+import src.preprocess.extract as ex
+def get_product_list(start_date=None):
+    """
+    Get unique product list from demand data
+    Args:
+        start_date: start date to filter data. Required.
+    """
+    demand = ex.read_orders_data(start_date=start_date)
+    print(demand["Material Number"].unique())
+    return demand["Material Number"].unique()
+def get_employee_list():
+    employee = ex.read_employee_data()
+    employee = employee["Description"]
+    return employee["Employee_Type"].unique()
+def get_released_product_list(start_date=None):
+    """
+    get released product list from COOIS_Released_Prod_Orders.csv
+    Args:
+        start_date: start date to filter data. Required.
+    """
+    released_orders = ex.read_orders_data(
+        start_date=start_date,
+    )
+    product_list = released_orders["Material Number"].unique().tolist()
+    print(f"Released products for date range {start_date}: {len(product_list)} products")
+    return product_list
+def get_available_dates():
+    """
+    get available all dates from COOIS_Released_Prod_Orders.csv
+    Returns:
+        tuple: (start_dates, end_dates) - unique start dates and end dates list
+    """
+    released_orders = ex.read_orders_data()
+    released_orders["Basic start date"] = pd.to_datetime(released_orders["Basic start date"])
+    released_orders["Basic finish date"] = pd.to_datetime(released_orders["Basic finish date"])
+    start_dates = sorted(released_orders["Basic start date"].dt.date.unique())
+    end_dates = sorted(released_orders["Basic finish date"].dt.date.unique())
+    all_dates = sorted(set(start_dates + end_dates))
+    return all_dates, start_dates, end_dates
+def get_date_ranges():
+    """
+    get available (start_date, end_date) combinations
+    Returns:
+        list : available (start_date, end_date) combinations
+    """
+    released_orders = ex.read_orders_data()
+    released_orders["Basic start date"] = pd.to_datetime(released_orders["Basic start date"])
+    released_orders["Basic finish date"] = pd.to_datetime(released_orders["Basic finish date"])
+    date_ranges = released_orders[["Basic start date", "Basic finish date"]].drop_duplicates()
+    date_ranges["start_date"] = date_ranges["Basic start date"].dt.date
+    date_ranges["end_date"] = date_ranges["Basic finish date"].dt.date
+    ranges = [(row["start_date"], row["end_date"]) for _, row in date_ranges.iterrows()]
+    ranges = sorted(set(ranges))
+    return ranges

src/visualization/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+"""
+Visualization package for Supply Roster Optimization Tool
+Provides visualization dashboards and charts for optimization results
+"""

src/visualization/hierarchy_dashboard.py ADDED Viewed

	@@ -0,0 +1,554 @@

+"""
+Hierarchy-Based Production Flow Visualization
+Shows how kits flow through production based on dependency hierarchy
+"""
+import streamlit as st
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+try:
+    import networkx as nx
+    NETWORKX_AVAILABLE = True
+except ImportError:
+    NETWORKX_AVAILABLE = False
+    nx = None
+import numpy as np
+import sys
+from src.config.optimization_config import (
+    KIT_LEVELS, KIT_DEPENDENCIES, TEAM_REQ_PER_PRODUCT,
+    shift_code_to_name, line_code_to_name
+)
+from src.config.constants import ShiftType, LineType, KitLevel
+# Import kit relationships dashboard
+try:
+    from src.visualization.kit_relationships import display_kit_relationships_dashboard
+except ImportError:
+    display_kit_relationships_dashboard = None
+def display_hierarchy_operations_dashboard(results):
+    """Enhanced operations dashboard showing hierarchy-based production flow"""
+    st.header("🏭 Hierarchy-Based Operations Dashboard")
+    st.markdown("---")
+    # Create main dashboard tabs
+    tab1, tab2, tab3 = st.tabs([
+        "🔄 Production Flow",
+        "📊 Hierarchy Analytics",
+        "🔗 Kit Relationships"
+    ])
+    with tab1:
+        display_production_flow_visualization(results)
+    with tab2:
+        display_hierarchy_analytics(results)
+    with tab3:
+        # Kit relationships from actual hierarchy data
+        if display_kit_relationships_dashboard:
+            display_kit_relationships_dashboard(results)
+        else:
+            st.error("Kit relationships dashboard not available. Please check installation.")
+def display_production_flow_visualization(results):
+    """Show how products flow through production lines by hierarchy"""
+    st.subheader("🔄 Kit Production Flow by Hierarchy")
+    # Get production sequence data
+    flow_data = prepare_hierarchy_flow_data(results)
+    if not flow_data:
+        st.warning("No production data available for flow visualization")
+        return
+    # Create flow diagram
+    # Hierarchy level summary - horizontal layout
+    st.subheader("📦 Production by Level")
+    level_summary = get_hierarchy_level_summary(flow_data)
+    # Create horizontal columns for each level
+    level_names = ['prepack', 'subkit', 'master']
+    available_levels = [level for level in level_names if level in level_summary]
+    if available_levels:
+        cols = st.columns(len(available_levels))
+        for i, level_name in enumerate(available_levels):
+            data = level_summary[level_name]
+            with cols[i]:
+                # Use custom styling instead of st.metric to avoid delta arrows
+                st.markdown(f"""
+                <div style="
+                    background: linear-gradient(135deg, #f0f8ff, #e6f3ff);
+                    padding: 1rem;
+                    border-radius: 0.5rem;
+                    text-align: center;
+                    border-left: 4px solid {'#90EE90' if level_name == 'prepack' else '#FFD700' if level_name == 'subkit' else '#FF6347'};
+                    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+                ">
+                    <div style="font-size: 0.8rem; color: #666; text-transform: uppercase; letter-spacing: 1px;">
+                        {level_name.title()} Kits
+                    </div>
+                    <div style="font-size: 1.5rem; font-weight: bold; color: #333; margin: 0.2rem 0;">
+                        {data['count']} products
+                    </div>
+                    <div style="font-size: 1rem; color: #555;">
+                        {data['total_units']:,.0f} units
+                    </div>
+                </div>
+                """, unsafe_allow_html=True)
+    # Timeline view of hierarchy production
+    st.subheader("📅 Hierarchy Production Timeline")
+    try:
+        fig_timeline = create_hierarchy_timeline(flow_data)
+        st.plotly_chart(fig_timeline, use_container_width=True)
+    except Exception as e:
+        st.warning(f"Timeline chart temporarily unavailable. Showing alternative visualization.")
+        # Fallback: Simple bar chart by day
+        if flow_data:
+            df_simple = pd.DataFrame([{
+                'Day': f"Day {row['day']}",
+                'Level': row['level_name'].title(),
+                'Units': row['units'],
+                'Product': row['product']
+            } for row in flow_data])
+            fig_simple = px.bar(df_simple, x='Day', y='Units', color='Level',
+                               title='Production Volume by Day and Hierarchy Level',
+                               color_discrete_map={
+                                   'Prepack': '#90EE90',
+                                   'Subkit': '#FFD700',
+                                   'Master': '#FF6347'
+                               })
+            st.plotly_chart(fig_simple, use_container_width=True)
+def display_hierarchy_analytics(results):
+    """Deep dive analytics on hierarchy production performance"""
+    st.subheader("📊 Hierarchy Performance Analytics")
+    # Prepare analytics data
+    analytics_data = prepare_hierarchy_analytics_data(results)
+    if not analytics_data:
+        st.warning("No hierarchy data available for analytics")
+        return
+    # Key metrics
+    col1, col2, col3, col4 = st.columns(4)
+    with col1:
+        prepack_efficiency = analytics_data.get('prepack_efficiency', 0)
+        st.metric("Prepack Efficiency", f"{prepack_efficiency:.1f}%",
+                 delta=f"{prepack_efficiency-95:.1f}%" if prepack_efficiency != 95 else None)
+    with col2:
+        dependency_violations = analytics_data.get('dependency_violations', 0)
+        st.metric("Dependency Violations", f"{dependency_violations}",
+                 delta=f"-{dependency_violations}" if dependency_violations > 0 else None)
+    with col3:
+        avg_lead_time = analytics_data.get('avg_lead_time', 0)
+        st.metric("Avg Lead Time", f"{avg_lead_time:.1f} days")
+    with col4:
+        hierarchy_cost_efficiency = analytics_data.get('cost_efficiency', 0)
+        st.metric("Cost Efficiency", f"€{hierarchy_cost_efficiency:.2f}/unit")
+    # Dependency flow chart
+    st.subheader("🔗 Dependency Network Analysis")
+    fig_network = create_dependency_network_chart(analytics_data)
+    st.plotly_chart(fig_network, use_container_width=True)
+    # Production heatmap
+    st.subheader("🔥 Hierarchy Production Heatmap")
+    heatmap_fig = create_hierarchy_heatmap(results)
+    st.plotly_chart(heatmap_fig, use_container_width=True)
+# Removed display_enhanced_line_utilization function - utilization concept removed
+def display_production_sequence_analysis(results):
+    """Analyze production sequence and timing"""
+    st.subheader("🎯 Production Sequence Analysis")
+    if not sequence_data:
+        st.warning("No sequence data available")
+        return
+    # Sequence adherence metrics
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        sequence_score = sequence_data.get('sequence_adherence_score', 0)
+        st.metric("Sequence Adherence", f"{sequence_score:.1f}%",
+                 help="How well production follows optimal hierarchy sequence")
+    with col2:
+        early_productions = sequence_data.get('early_productions', 0)
+        st.metric("Early Productions", f"{early_productions}",
+                 help="Products produced before their dependencies")
+    with col3:
+        optimal_sequences = sequence_data.get('optimal_sequences', 0)
+        st.metric("Optimal Sequences", f"{optimal_sequences}%",
+                 help="Percentage of products following optimal sequence")
+    # Sequence violation chart
+    if sequence_data.get('violations'):
+        st.subheader("⚠️ Sequence Violations")
+        violations_df = pd.DataFrame(sequence_data['violations'])
+        fig = px.scatter(violations_df,
+                        x='production_day', y='dependency_day',
+                        color='severity', size='impact',
+                        hover_data=['product', 'dependency'],
+                        title='Production vs Dependency Timing (Violations in Red)',
+                        labels={'production_day': 'When Product Was Made',
+                               'dependency_day': 'When Dependency Was Made'})
+        # Add diagonal line (should be above this line)
+        max_day = max(violations_df['production_day'].max(), violations_df['dependency_day'].max())
+        fig.add_shape(type="line", x0=0, y0=0, x1=max_day, y1=max_day,
+                     line=dict(dash="dash", color="gray"),
+                     name="Ideal Sequence Line")
+        st.plotly_chart(fig, use_container_width=True)
+    # Sequence optimization suggestions
+    st.subheader("💡 Optimization Suggestions")
+    suggestions = generate_sequence_suggestions(sequence_data)
+    for suggestion in suggestions:
+        st.info(f"💡 {suggestion}")
+# Helper Functions
+def prepare_hierarchy_flow_data(results):
+    """Prepare data for hierarchy flow visualization"""
+    flow_data = []
+    for row in results['run_schedule']:
+        product = row['product']
+        level = KIT_LEVELS.get(product, KitLevel.MASTER)
+        level_name = KitLevel.get_name(level)
+        flow_data.append({
+            'product': product,
+            'level': level,
+            'level_name': level_name,
+            'day': row['day'],
+            'shift': row['shift'],
+            'line_type': row['line_type_id'],
+            'line_idx': row['line_idx'],
+            'hours': row['run_hours'],
+            'units': row['units'],
+            'dependencies': KIT_DEPENDENCIES.get(product, [])
+        })
+    return flow_data
+def create_hierarchy_timeline(flow_data):
+    """Create timeline showing hierarchy production sequence"""
+    if not flow_data:
+        return go.Figure()
+    # Prepare timeline data with proper datetime conversion
+    timeline_data = []
+    from datetime import datetime, timedelta
+    base_date = datetime(2025, 1, 1)  # Base date for timeline
+    for row in flow_data:
+        shift_name = ShiftType.get_name(row['shift'])
+        line_name = LineType.get_name(row['line_type'])
+        # Create start and end times for the production run
+        start_date = base_date + timedelta(days=row['day']-1)
+        end_date = start_date + timedelta(hours=row['hours'])
+        timeline_data.append({
+            'Product': row['product'],
+            'Level': row['level_name'].title(),
+            'Start': start_date,
+            'End': end_date,
+            'Day': f"Day {row['day']}",
+            'Shift': shift_name,
+            'Line': f"{line_name} {row['line_idx']}",
+            'Units': row['units'],
+            'Hours': row['hours'],
+            'Priority': row['level']  # For sorting
+        })
+    df = pd.DataFrame(timeline_data)
+    if df.empty:
+        return go.Figure()
+    # Create timeline chart with proper datetime columns
+    fig = px.timeline(df,
+                     x_start='Start', x_end='End',
+                     y='Line',
+                     color='Level',
+                     hover_data=['Product', 'Units', 'Hours', 'Shift', 'Day'],
+                     title='Production Timeline by Hierarchy Level',
+                     color_discrete_map={
+                         'Prepack': '#90EE90',
+                         'Subkit': '#FFD700',
+                         'Master': '#FF6347'
+                     })
+    fig.update_layout(
+        height=500,
+        xaxis_title='Production Timeline',
+        yaxis_title='Production Line'
+    )
+    return fig
+def prepare_hierarchy_analytics_data(results):
+    """Prepare analytics data for hierarchy performance"""
+    analytics = {
+        'prepack_efficiency': 0,
+        'dependency_violations': 0,
+        'avg_lead_time': 0,
+        'cost_efficiency': 0,
+        'violations': [],
+        'dependencies': KIT_DEPENDENCIES
+    }
+    # Calculate metrics
+    total_cost = results.get('objective', 0)
+    total_units = sum(results.get('weekly_production', {}).values())
+    if total_units > 0:
+        analytics['cost_efficiency'] = total_cost / total_units
+    # Analyze dependency violations
+    production_times = {}
+    for row in results['run_schedule']:
+        product = row['product']
+        day = row['day']
+        if product not in production_times or day < production_times[product]:
+            production_times[product] = day
+    violations = 0
+    violation_details = []
+    for product, prod_day in production_times.items():
+        dependencies = KIT_DEPENDENCIES.get(product, [])
+        for dep in dependencies:
+            if dep in production_times:
+                dep_day = production_times[dep]
+                if dep_day > prod_day:  # Dependency produced after product
+                    violations += 1
+                    violation_details.append({
+                        'product': product,
+                        'dependency': dep,
+                        'production_day': prod_day,
+                        'dependency_day': dep_day,
+                        'severity': 'high' if dep_day - prod_day > 1 else 'medium',
+                        'impact': abs(dep_day - prod_day)
+                    })
+    analytics['dependency_violations'] = violations
+    analytics['violations'] = violation_details
+    return analytics
+# Removed calculate_hierarchy_line_utilization and create_utilization_gauge functions
+# - utilization concept removed from dashboard
+def create_hierarchy_heatmap(results):
+    """Create heatmap showing hierarchy production by line and day"""
+    # Prepare heatmap data
+    heatmap_data = []
+    for row in results['run_schedule']:
+        product = row['product']
+        level_name = KitLevel.get_name(KIT_LEVELS.get(product, KitLevel.MASTER))
+        line_name = f"{LineType.get_name(row['line_type_id'])} {row['line_idx']}"
+        heatmap_data.append({
+            'Line': line_name,
+            'Day': f"Day {row['day']}",
+            'Level': level_name,
+            'Units': row['units'],
+            'Hours': row['run_hours']
+        })
+    if not heatmap_data:
+        return go.Figure()
+    df = pd.DataFrame(heatmap_data)
+    # Pivot for heatmap
+    pivot_df = df.pivot_table(
+        values='Units',
+        index='Line',
+        columns='Day',
+        aggfunc='sum',
+        fill_value=0
+    )
+    fig = px.imshow(pivot_df.values,
+                   x=pivot_df.columns,
+                   y=pivot_df.index,
+                   color_continuous_scale='Blues',
+                   title='Production Volume Heatmap (Units per Day)',
+                   labels=dict(x="Day", y="Production Line", color="Units"))
+    return fig
+def create_dependency_network_chart(analytics_data):
+    """Create network chart showing dependency relationships"""
+    dependencies = analytics_data.get('dependencies', {})
+    if not dependencies or not NETWORKX_AVAILABLE:
+        return go.Figure().add_annotation(
+            text="Dependency network visualization requires 'networkx' package. Install with: pip install networkx" if not NETWORKX_AVAILABLE else "No dependency relationships to display",
+            xref="paper", yref="paper",
+            x=0.5, y=0.5, showarrow=False
+        )
+    # Create network graph
+    G = nx.DiGraph()
+    # Add nodes and edges
+    for product, deps in dependencies.items():
+        if product and deps:  # Only if product has dependencies
+            G.add_node(product)
+            for dep in deps:
+                if dep:  # Only if dependency exists
+                    G.add_node(dep)
+                    G.add_edge(dep, product)  # Dependency -> Product
+    if len(G.nodes()) == 0:
+        return go.Figure().add_annotation(
+            text="No dependency relationships to display",
+            xref="paper", yref="paper",
+            x=0.5, y=0.5, showarrow=False
+        )
+    # Calculate layout
+    pos = nx.spring_layout(G, k=3, iterations=50)
+    # Create edge traces
+    edge_x = []
+    edge_y = []
+    for edge in G.edges():
+        x0, y0 = pos[edge[0]]
+        x1, y1 = pos[edge[1]]
+        edge_x.extend([x0, x1, None])
+        edge_y.extend([y0, y1, None])
+    edge_trace = go.Scatter(x=edge_x, y=edge_y,
+                           line=dict(width=0.5, color='#888'),
+                           hoverinfo='none',
+                           mode='lines')
+    # Create node traces
+    node_x = []
+    node_y = []
+    node_text = []
+    node_color = []
+    for node in G.nodes():
+        x, y = pos[node]
+        node_x.append(x)
+        node_y.append(y)
+        node_text.append(node)
+        # Color by hierarchy level
+        level = KIT_LEVELS.get(node, KitLevel.MASTER)
+        if level == KitLevel.PREPACK:
+            node_color.append('#90EE90')
+        elif level == KitLevel.SUBKIT:
+            node_color.append('#FFD700')
+        else:
+            node_color.append('#FF6347')
+    node_trace = go.Scatter(x=node_x, y=node_y,
+                           mode='markers+text',
+                           text=node_text,
+                           textposition='middle center',
+                           marker=dict(size=20, color=node_color, line=dict(width=2, color='black')),
+                           hoverinfo='text',
+                           hovertext=node_text)
+    fig = go.Figure(data=[edge_trace, node_trace],
+                   layout=go.Layout(
+                       title='Kit Dependency Network',
+                       titlefont_size=16,
+                       showlegend=False,
+                       hovermode='closest',
+                       margin=dict(b=20,l=5,r=5,t=40),
+                       annotations=[ dict(
+                           text="Green=Prepack, Gold=Subkit, Red=Master",
+                           showarrow=False,
+                           xref="paper", yref="paper",
+                           x=0.005, y=-0.002,
+                           xanchor='left', yanchor='bottom',
+                           font=dict(size=12)
+                       )],
+                       xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
+                       yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))
+    return fig
+def generate_sequence_suggestions(sequence_data):
+    """Generate optimization suggestions based on sequence analysis"""
+    suggestions = []
+    adherence = sequence_data.get('sequence_adherence_score', 0)
+    violations = sequence_data.get('early_productions', 0)
+    if adherence < 80:
+        suggestions.append(
+            "Consider adjusting production sequence to better follow hierarchy dependencies. "
+            "Current adherence is below optimal (80%)."
+        )
+    if violations > 0:
+        suggestions.append(
+            f"Found {violations} dependency violations. Review production scheduling to ensure "
+            "prepacks are produced before subkits, and subkits before masters."
+        )
+    if adherence >= 95:
+        suggestions.append(
+            "Excellent sequence adherence! Production is following optimal hierarchy flow."
+        )
+    if not suggestions:
+        suggestions.append("Production sequence analysis complete. No major issues detected.")
+    return suggestions
+def get_hierarchy_level_summary(flow_data):
+    """Get summary statistics for each hierarchy level"""
+    summary = {}
+    for level_name in ['prepack', 'subkit', 'master']:
+        level_products = [row for row in flow_data if row['level_name'] == level_name]
+        summary[level_name] = {
+            'count': len(set(row['product'] for row in level_products)),
+            'total_units': sum(row['units'] for row in level_products),
+            'total_hours': sum(row['hours'] for row in level_products)
+        }
+    return summary

src/visualization/kit_relationships.py ADDED Viewed

	@@ -0,0 +1,629 @@

+"""
+Kit Relationship Visualization
+Shows the actual dependency relationships between kits in production
+based on kit_hierarchy.json data
+"""
+import streamlit as st
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+import json
+import sys
+from src.config.constants import ShiftType, LineType, KitLevel
+# Optional networkx for advanced network layouts
+try:
+    import networkx as nx
+    NETWORKX_AVAILABLE = True
+except ImportError:
+    NETWORKX_AVAILABLE = False
+    nx = None
+def load_kit_hierarchy():
+    """Load kit hierarchy data from JSON file"""
+    try:
+        with open('data/hierarchy_exports/kit_hierarchy.json', 'r') as f:
+            return json.load(f)
+    except FileNotFoundError:
+        st.error("Kit hierarchy file not found. Please ensure kit_hierarchy.json exists in data/hierarchy_exports/")
+        return {}
+    except json.JSONDecodeError:
+        st.error("Invalid kit hierarchy JSON format")
+        return {}
+def display_kit_relationships_dashboard(results):
+    """Main dashboard showing kit relationships in production"""
+    st.header("🔗 Kit Relationship Dashboard")
+    st.markdown("Visualizing dependencies between kits being produced")
+    st.markdown("---")
+    # Load hierarchy data
+    hierarchy_data = load_kit_hierarchy()
+    if not hierarchy_data:
+        st.warning("No kit hierarchy data available")
+        return
+    # Get produced kits from results
+    produced_kits = set()
+    if 'weekly_production' in results:
+        produced_kits = set(results['weekly_production'].keys())
+    elif 'run_schedule' in results:
+        produced_kits = set(row['product'] for row in results['run_schedule'])
+    if not produced_kits:
+        st.warning("No production data available")
+        return
+    # Create tabs for different relationship views
+    tab1, tab2, tab3, tab4 = st.tabs([
+        "🌐 Dependency Network",
+        "📊 Relationship Matrix",
+        "🎯 Production Flow",
+        "⚠️ Dependency Analysis"
+    ])
+    with tab1:
+        display_dependency_network(hierarchy_data, produced_kits, results)
+    with tab2:
+        display_relationship_matrix(hierarchy_data, produced_kits, results)
+    with tab3:
+        display_production_flow_relationships(hierarchy_data, produced_kits, results)
+    with tab4:
+        display_dependency_analysis(hierarchy_data, produced_kits, results)
+def display_dependency_network(hierarchy_data, produced_kits, results):
+    """Show interactive network graph of kit dependencies"""
+    st.subheader("🌐 Kit Dependency Network")
+    st.markdown("Interactive graph showing which kits depend on other kits")
+    # Build relationship data for produced kits only
+    relationships = build_relationship_data(hierarchy_data, produced_kits)
+    if not relationships:
+        st.info("No dependency relationships found between produced kits")
+        return
+    # Get production timing data
+    production_timing = get_production_timing(results)
+    # Create network visualization
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        if NETWORKX_AVAILABLE:
+            fig = create_interactive_network_graph(relationships, production_timing)
+            st.plotly_chart(fig, use_container_width=True)
+        else:
+            fig = create_simple_dependency_chart(relationships, production_timing)
+            st.plotly_chart(fig, use_container_width=True)
+            st.info("💡 Install networkx for advanced network layouts: `pip install networkx`")
+    with col2:
+        # Network statistics
+        st.subheader("📈 Network Stats")
+        all_kits = set()
+        for rel in relationships:
+            all_kits.add(rel['source'])
+            all_kits.add(rel['target'])
+        st.metric("Total Kits", len(all_kits))
+        st.metric("Dependencies", len(relationships))
+        # Dependency depth analysis
+        max_depth = calculate_dependency_depth(relationships)
+        st.metric("Max Dependency Depth", max_depth)
+        # Most dependent kits
+        dependent_kits = get_most_dependent_kits(relationships)
+        st.subheader("🔗 Most Dependencies")
+        for kit, count in dependent_kits[:5]:
+            st.write(f"**{kit}**: {count} dependencies")
+def display_relationship_matrix(hierarchy_data, produced_kits, results):
+    """Show dependency matrix heatmap"""
+    st.subheader("📊 Kit Dependency Matrix")
+    st.markdown("Heatmap showing which kits (rows) depend on which other kits (columns)")
+    # Build dependency matrix
+    matrix_data = build_dependency_matrix(hierarchy_data, produced_kits)
+    if matrix_data.empty:
+        st.info("No dependency relationships to visualize in matrix form")
+        return
+    # Create heatmap
+    fig = px.imshow(matrix_data.values,
+                   x=matrix_data.columns,
+                   y=matrix_data.index,
+                   color_continuous_scale='Blues',
+                   title='Kit Dependency Matrix (1 = depends on, 0 = no dependency)',
+                   labels=dict(x="Dependency (what is needed)",
+                              y="Kit (what depends on others)",
+                              color="Dependency"))
+    fig.update_layout(height=600)
+    st.plotly_chart(fig, use_container_width=True)
+    # Show matrix as table
+    with st.expander("📋 View Dependency Matrix as Table"):
+        st.dataframe(matrix_data, use_container_width=True)
+def display_production_flow_relationships(hierarchy_data, produced_kits, results):
+    """Show how relationships affect production timing"""
+    st.subheader("🎯 Production Flow with Relationships")
+    st.markdown("Timeline showing when dependent kits are produced")
+    # Get production timing and relationships
+    production_timing = get_production_timing(results)
+    relationships = build_relationship_data(hierarchy_data, produced_kits)
+    if not production_timing or not relationships:
+        st.info("Insufficient data for production flow analysis")
+        return
+    # Create timeline with dependency arrows
+    fig = create_production_timeline_with_dependencies(production_timing, relationships)
+    st.plotly_chart(fig, use_container_width=True)
+    # Timing analysis table
+    st.subheader("⏰ Dependency Timing Analysis")
+    timing_analysis = analyze_dependency_timing(production_timing, relationships)
+    if timing_analysis:
+        df = pd.DataFrame(timing_analysis)
+        st.dataframe(df, use_container_width=True)
+def display_dependency_analysis(hierarchy_data, produced_kits, results):
+    """Analyze dependency fulfillment and violations"""
+    st.subheader("⚠️ Dependency Analysis & Violations")
+    production_timing = get_production_timing(results)
+    relationships = build_relationship_data(hierarchy_data, produced_kits)
+    # Analyze violations
+    violations = find_dependency_violations(production_timing, relationships)
+    # Summary metrics
+    col1, col2, col3, col4 = st.columns(4)
+    with col1:
+        total_deps = len(relationships)
+        st.metric("Total Dependencies", total_deps)
+    with col2:
+        violated_deps = len(violations)
+        st.metric("Violations", violated_deps,
+                 delta=f"-{violated_deps}" if violated_deps > 0 else None)
+    with col3:
+        if total_deps > 0:
+            success_rate = ((total_deps - violated_deps) / total_deps) * 100
+            st.metric("Success Rate", f"{success_rate:.1f}%")
+        else:
+            st.metric("Success Rate", "N/A")
+    with col4:
+        if violations:
+            avg_violation = sum(v['days_early'] for v in violations) / len(violations)
+            st.metric("Avg Days Early", f"{avg_violation:.1f}")
+        else:
+            st.metric("Avg Days Early", "0")
+    # Violation details
+    if violations:
+        st.subheader("🚨 Dependency Violations")
+        st.markdown("Cases where kits were produced before their dependencies")
+        violation_df = pd.DataFrame(violations)
+        # Violation severity chart
+        fig = px.scatter(violation_df,
+                        x='dependency_day', y='kit_day',
+                        size='days_early', color='severity',
+                        hover_data=['kit', 'dependency'],
+                        title='Dependency Violations (Below diagonal = violation)',
+                        labels={'dependency_day': 'When Dependency Was Made',
+                               'kit_day': 'When Kit Was Made'})
+        # Add diagonal line showing ideal timing
+        max_day = max(violation_df['dependency_day'].max(), violation_df['kit_day'].max())
+        fig.add_shape(type="line", x0=0, y0=0, x1=max_day, y1=max_day,
+                     line=dict(dash="dash", color="green"),
+                     name="Ideal Timeline")
+        st.plotly_chart(fig, use_container_width=True)
+        # Detailed violation table
+        st.dataframe(violation_df[['kit', 'dependency', 'kit_day', 'dependency_day',
+                                  'days_early', 'severity']], use_container_width=True)
+    else:
+        st.success("🎉 No dependency violations found! All kits produced in correct order.")
+    # Recommendations
+    st.subheader("💡 Recommendations")
+    recommendations = generate_dependency_recommendations(violations, relationships, production_timing)
+    for rec in recommendations:
+        st.info(f"💡 {rec}")
+# Helper Functions
+def build_relationship_data(hierarchy_data, produced_kits):
+    """Build relationship data for visualization"""
+    relationships = []
+    for kit_id, kit_info in hierarchy_data.items():
+        if kit_id not in produced_kits:
+            continue
+        # Add direct dependencies
+        dependencies = kit_info.get('dependencies', [])
+        for dep in dependencies:
+            if dep in produced_kits:  # Only show relationships between produced kits
+                relationships.append({
+                    'source': dep,  # Dependency (what's needed)
+                    'target': kit_id,  # Kit that depends on it
+                    'type': 'direct',
+                    'source_type': hierarchy_data.get(dep, {}).get('type', 'unknown'),
+                    'target_type': kit_info.get('type', 'unknown')
+                })
+    return relationships
+def build_dependency_matrix(hierarchy_data, produced_kits):
+    """Build dependency matrix for heatmap"""
+    produced_list = sorted(list(produced_kits))
+    if len(produced_list) == 0:
+        return pd.DataFrame()
+    # Initialize matrix
+    matrix = pd.DataFrame(0, index=produced_list, columns=produced_list)
+    # Fill matrix with dependencies
+    for kit_id in produced_list:
+        kit_info = hierarchy_data.get(kit_id, {})
+        dependencies = kit_info.get('dependencies', [])
+        for dep in dependencies:
+            if dep in produced_list:
+                matrix.loc[kit_id, dep] = 1  # kit_id depends on dep
+    return matrix
+def get_production_timing(results):
+    """Extract production timing for each kit"""
+    timing = {}
+    if 'run_schedule' in results:
+        for run in results['run_schedule']:
+            kit = run['product']
+            day = run['day']
+            # Use earliest day if kit is produced multiple times
+            if kit not in timing or day < timing[kit]:
+                timing[kit] = day
+    return timing
+def create_interactive_network_graph(relationships, production_timing):
+    """Create interactive network graph using NetworkX layout"""
+    if not NETWORKX_AVAILABLE:
+        return create_simple_dependency_chart(relationships, production_timing)
+    # Create NetworkX graph
+    G = nx.DiGraph()
+    # Add edges (relationships)
+    for rel in relationships:
+        G.add_edge(rel['source'], rel['target'], type=rel['type'])
+    if len(G.nodes()) == 0:
+        return go.Figure().add_annotation(
+            text="No relationships to display",
+            xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False
+        )
+    # Calculate layout
+    pos = nx.spring_layout(G, k=3, iterations=50)
+    # Create edge traces
+    edge_x, edge_y = [], []
+    edge_info = []
+    for edge in G.edges():
+        source, target = edge
+        x0, y0 = pos[source]
+        x1, y1 = pos[target]
+        edge_x.extend([x0, x1, None])
+        edge_y.extend([y0, y1, None])
+        # Add arrow annotation
+        edge_info.append({
+            'x': (x0 + x1) / 2,
+            'y': (y0 + y1) / 2,
+            'text': '→',
+            'source': source,
+            'target': target
+        })
+    edge_trace = go.Scatter(x=edge_x, y=edge_y,
+                           line=dict(width=2, color='#888'),
+                           hoverinfo='none',
+                           mode='lines')
+    # Create node traces
+    node_x, node_y, node_text, node_color, node_size = [], [], [], [], []
+    node_info = []
+    for node in G.nodes():
+        x, y = pos[node]
+        node_x.append(x)
+        node_y.append(y)
+        # Node size based on number of connections
+        in_degree = G.in_degree(node)
+        out_degree = G.out_degree(node)
+        total_degree = in_degree + out_degree
+        node_size.append(20 + total_degree * 5)
+        # Color by production timing
+        prod_day = production_timing.get(node, 0)
+        if prod_day == 1:
+            node_color.append('#90EE90')  # Light green for early
+        elif prod_day <= 3:
+            node_color.append('#FFD700')  # Gold for middle
+        else:
+            node_color.append('#FF6347')  # Tomato for late
+        # Node text and info
+        short_name = node[:12] + "..." if len(node) > 12 else node
+        node_text.append(short_name)
+        node_info.append(f"{node}<br>Day: {prod_day}<br>In: {in_degree}, Out: {out_degree}")
+    node_trace = go.Scatter(x=node_x, y=node_y,
+                           mode='markers+text',
+                           text=node_text,
+                           textposition='middle center',
+                           hovertext=node_info,
+                           hoverinfo='text',
+                           marker=dict(size=node_size,
+                                     color=node_color,
+                                     line=dict(width=2, color='black')))
+    # Create figure
+    fig = go.Figure(data=[edge_trace, node_trace],
+                   layout=go.Layout(
+                       title='Kit Dependency Network (Size=Connections, Color=Production Day)',
+                       showlegend=False,
+                       hovermode='closest',
+                       margin=dict(b=20,l=5,r=5,t=40),
+                       annotations=[
+                           dict(text="Green=Early, Gold=Middle, Red=Late production",
+                                showarrow=False,
+                                xref="paper", yref="paper",
+                                x=0.005, y=-0.002,
+                                xanchor='left', yanchor='bottom',
+                                font=dict(size=12))
+                       ],
+                       xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
+                       yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))
+    return fig
+def create_simple_dependency_chart(relationships, production_timing):
+    """Create simple dependency chart without NetworkX"""
+    if not relationships:
+        return go.Figure().add_annotation(
+            text="No dependencies to display",
+            xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False
+        )
+    # Create a simple directed graph visualization
+    # Group kits by their role (sources, targets)
+    sources = set(rel['source'] for rel in relationships)
+    targets = set(rel['target'] for rel in relationships)
+    # Create positions
+    all_kits = list(sources | targets)
+    positions = {kit: (i, production_timing.get(kit, 0)) for i, kit in enumerate(all_kits)}
+    # Create traces
+    edge_x, edge_y = [], []
+    for rel in relationships:
+        source_pos = positions[rel['source']]
+        target_pos = positions[rel['target']]
+        edge_x.extend([source_pos[0], target_pos[0], None])
+        edge_y.extend([source_pos[1], target_pos[1], None])
+    # Edge trace
+    edge_trace = go.Scatter(x=edge_x, y=edge_y,
+                           line=dict(width=2, color='#888'),
+                           hoverinfo='none',
+                           mode='lines')
+    # Node trace
+    node_x = [positions[kit][0] for kit in all_kits]
+    node_y = [positions[kit][1] for kit in all_kits]
+    node_text = [kit[:10] + "..." if len(kit) > 10 else kit for kit in all_kits]
+    node_trace = go.Scatter(x=node_x, y=node_y,
+                           mode='markers+text',
+                           text=node_text,
+                           textposition='top center',
+                           marker=dict(size=15, color='lightblue',
+                                     line=dict(width=2, color='black')),
+                           hovertext=all_kits,
+                           hoverinfo='text')
+    fig = go.Figure(data=[edge_trace, node_trace],
+                   layout=go.Layout(
+                       title='Kit Dependencies (Y-axis = Production Day)',
+                       showlegend=False,
+                       xaxis=dict(title='Kits'),
+                       yaxis=dict(title='Production Day')))
+    return fig
+def create_production_timeline_with_dependencies(production_timing, relationships):
+    """Create timeline showing production order with dependency arrows"""
+    if not production_timing:
+        return go.Figure()
+    # Prepare data
+    timeline_data = []
+    for kit, day in production_timing.items():
+        timeline_data.append({
+            'Kit': kit,
+            'Day': day,
+            'Short_Name': kit[:15] + "..." if len(kit) > 15 else kit
+        })
+    df = pd.DataFrame(timeline_data)
+    # Create scatter plot
+    fig = px.scatter(df, x='Day', y='Kit',
+                    hover_data=['Kit'],
+                    title='Production Timeline with Dependencies')
+    # Add dependency arrows
+    for rel in relationships:
+        source_day = production_timing.get(rel['source'], 0)
+        target_day = production_timing.get(rel['target'], 0)
+        # Add arrow if both kits are in timeline
+        if source_day > 0 and target_day > 0:
+            fig.add_annotation(
+                x=target_day, y=rel['target'],
+                ax=source_day, ay=rel['source'],
+                arrowhead=2, arrowsize=1, arrowwidth=2,
+                arrowcolor="red" if source_day > target_day else "green"
+            )
+    fig.update_layout(height=max(400, len(df) * 20))
+    return fig
+def calculate_dependency_depth(relationships):
+    """Calculate maximum dependency depth"""
+    if not NETWORKX_AVAILABLE or not relationships:
+        return 0
+    G = nx.DiGraph()
+    for rel in relationships:
+        G.add_edge(rel['source'], rel['target'])
+    try:
+        return nx.dag_longest_path_length(G)
+    except:
+        return 0
+def get_most_dependent_kits(relationships):
+    """Get kits with most dependencies"""
+    dependency_counts = {}
+    for rel in relationships:
+        target = rel['target']
+        dependency_counts[target] = dependency_counts.get(target, 0) + 1
+    return sorted(dependency_counts.items(), key=lambda x: x[1], reverse=True)
+def find_dependency_violations(production_timing, relationships):
+    """Find cases where kits were produced before their dependencies"""
+    violations = []
+    for rel in relationships:
+        source = rel['source']  # dependency
+        target = rel['target']  # kit that depends on it
+        source_day = production_timing.get(source, 0)
+        target_day = production_timing.get(target, 0)
+        if source_day > 0 and target_day > 0 and source_day > target_day:
+            days_early = source_day - target_day
+            severity = 'high' if days_early > 2 else 'medium' if days_early > 1 else 'low'
+            violations.append({
+                'kit': target,
+                'dependency': source,
+                'kit_day': target_day,
+                'dependency_day': source_day,
+                'days_early': days_early,
+                'severity': severity
+            })
+    return violations
+def analyze_dependency_timing(production_timing, relationships):
+    """Analyze timing of all dependency relationships"""
+    timing_analysis = []
+    for rel in relationships:
+        source = rel['source']
+        target = rel['target']
+        source_day = production_timing.get(source, 0)
+        target_day = production_timing.get(target, 0)
+        if source_day > 0 and target_day > 0:
+            timing_diff = target_day - source_day
+            status = "✅ Correct" if timing_diff >= 0 else "❌ Violation"
+            timing_analysis.append({
+                'Kit': target[:20] + "..." if len(target) > 20 else target,
+                'Dependency': source[:20] + "..." if len(source) > 20 else source,
+                'Kit Day': target_day,
+                'Dep Day': source_day,
+                'Gap (Days)': timing_diff,
+                'Status': status
+            })
+    return sorted(timing_analysis, key=lambda x: x['Gap (Days)'])
+def generate_dependency_recommendations(violations, relationships, production_timing):
+    """Generate recommendations based on dependency analysis"""
+    recommendations = []
+    if not violations:
+        recommendations.append("Excellent! All dependencies are being fulfilled in the correct order.")
+        return recommendations
+    # Group violations by severity
+    high_severity = [v for v in violations if v['severity'] == 'high']
+    medium_severity = [v for v in violations if v['severity'] == 'medium']
+    if high_severity:
+        recommendations.append(
+            f"🚨 High Priority: {len(high_severity)} critical dependency violations found. "
+            "Consider rescheduling production to ensure dependencies are produced first."
+        )
+    if medium_severity:
+        recommendations.append(
+            f"⚠️ Medium Priority: {len(medium_severity)} moderate dependency timing issues. "
+            "Review production sequence for optimization opportunities."
+        )
+    # Most problematic kits
+    problem_kits = {}
+    for v in violations:
+        kit = v['kit']
+        problem_kits[kit] = problem_kits.get(kit, 0) + 1
+    if problem_kits:
+        worst_kit = max(problem_kits.items(), key=lambda x: x[1])
+        recommendations.append(
+            f"🎯 Focus Area: Kit {worst_kit[0]} has {worst_kit[1]} dependency issues. "
+            "Consider moving its production later in the schedule."
+        )
+    return recommendations