Halimhailey commited on
Commit
ffa1f50
·
verified ·
1 Parent(s): f23946e

Upload folder using huggingface_hub

Browse files
src/.DS_Store ADDED
Binary file (6.15 kB). View file
 
src/.env ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ POSTGRES_USER=halim
2
+ POSTGRES_PASSWORD=haileyhalimunicef
3
+ POSTGRES_DB=rosteroptimization
4
+ PGADMIN_EMAIL=admin@example.com
5
+ PGADMIN_PASSWORD=admin
6
+ DB_PORT=5432
7
+ PGADMIN_PORT=5050
src/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
src/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (152 Bytes). View file
 
src/__pycache__/demand_filtering.cpython-310.pyc ADDED
Binary file (12.3 kB). View file
 
src/__pycache__/demand_validation_viz.cpython-310.pyc ADDED
Binary file (9.85 kB). View file
 
src/config/__init__.py ADDED
File without changes
src/config/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (159 Bytes). View file
 
src/config/__pycache__/constants.cpython-310.pyc ADDED
Binary file (5.3 kB). View file
 
src/config/__pycache__/optimization_config.cpython-310.pyc ADDED
Binary file (12.3 kB). View file
 
src/config/constants.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Constants module for Supply Roster Optimization Tool
3
+ Replaces hard-coded magic numbers with meaningful named constants
4
+ """
5
+ from src.preprocess import extract
6
+
7
+ class ShiftType:
8
+ """
9
+ Shift type constants to replace magic numbers
10
+ 1 = Regular, 2 = Evening, 3 = Overtime
11
+ """
12
+ REGULAR = 1
13
+ EVENING = 2
14
+ OVERTIME = 3
15
+
16
+ # All available shifts
17
+ ALL_SHIFTS = [REGULAR, EVENING, OVERTIME]
18
+
19
+ # Common shift combinations
20
+ REGULAR_AND_OVERTIME = [REGULAR, OVERTIME] # Normal mode (no evening)
21
+
22
+ @classmethod
23
+ def get_name(cls, shift_id):
24
+ """Get human-readable name for shift ID"""
25
+ names = {
26
+ cls.REGULAR: "Regular",
27
+ cls.EVENING: "Evening",
28
+ cls.OVERTIME: "Overtime"
29
+ }
30
+ return names.get(shift_id, "Unknown")
31
+
32
+ @classmethod
33
+ def get_all_names(cls):
34
+ """Get dictionary mapping shift IDs to names"""
35
+ return {
36
+ cls.REGULAR: "Regular",
37
+ cls.EVENING: "Evening",
38
+ cls.OVERTIME: "Overtime"
39
+ }
40
+
41
+ class LineType:
42
+ """
43
+ Line type constants to replace magic numbers
44
+ 6 = Long Line, 7 = Mini Load
45
+ """
46
+ LONG_LINE = 6
47
+ MINI_LOAD = 7
48
+
49
+ # All available line types
50
+ ALL_LINE_TYPES = [LONG_LINE, MINI_LOAD]
51
+
52
+ @classmethod
53
+ def get_name(cls, line_id):
54
+ """Get human-readable name for line type ID"""
55
+ names = {
56
+ cls.LONG_LINE: "Long Line",
57
+ cls.MINI_LOAD: "Mini Load"
58
+ }
59
+ return names.get(line_id, "Unknown")
60
+
61
+ @classmethod
62
+ def get_all_names(cls):
63
+ """Get dictionary mapping line type IDs to names"""
64
+ return {
65
+ cls.LONG_LINE: "Long Line",
66
+ cls.MINI_LOAD: "Mini Load"
67
+ }
68
+
69
+ class KitLevel:
70
+ """
71
+ Kit hierarchy level constants
72
+ 0 = Prepack, 1 = Subkit, 2 = Master
73
+ """
74
+ PREPACK = 0
75
+ SUBKIT = 1
76
+ MASTER = 2
77
+
78
+ # All available levels
79
+ ALL_LEVELS = [PREPACK, SUBKIT, MASTER]
80
+
81
+ @classmethod
82
+ def get_name(cls, level_id):
83
+ """Get human-readable name for kit level ID"""
84
+ names = {
85
+ cls.PREPACK: "prepack",
86
+ cls.SUBKIT: "subkit",
87
+ cls.MASTER: "master"
88
+ }
89
+ return names.get(level_id, "unknown")
90
+
91
+ @classmethod
92
+ def get_all_names(cls):
93
+ """Get dictionary mapping level IDs to names"""
94
+ return {
95
+ cls.PREPACK: "prepack",
96
+ cls.SUBKIT: "subkit",
97
+ cls.MASTER: "master"
98
+ }
99
+
100
+ # Removed get_timing_weight method - no longer needed
101
+ # Dependency ordering is now handled by topological sorting
102
+
103
+ class PaymentMode:
104
+ """
105
+ Payment mode constants
106
+ """
107
+ BULK = "bulk"
108
+ PARTIAL = "partial"
109
+
110
+ @classmethod
111
+ def get_all_modes(cls):
112
+ """Get all available payment modes"""
113
+ return [cls.BULK, cls.PARTIAL]
114
+
115
+ # Default configurations using constants
116
+ class DefaultConfig:
117
+ """Default configuration values using constants"""
118
+
119
+ # Default payment modes by shift
120
+ PAYMENT_MODE_CONFIG = {
121
+ ShiftType.REGULAR: PaymentMode.BULK,
122
+ ShiftType.EVENING: PaymentMode.BULK,
123
+ ShiftType.OVERTIME: PaymentMode.PARTIAL
124
+ }
125
+
126
+ # Default max hours per shift per person
127
+ MAX_HOUR_PER_SHIFT_PER_PERSON = {
128
+ ShiftType.REGULAR: 7.5,
129
+ ShiftType.EVENING: 7.5,
130
+ ShiftType.OVERTIME: 5
131
+ }
132
+
133
+ # Default max parallel workers per line type
134
+ MAX_PARALLEL_WORKERS = {
135
+ LineType.LONG_LINE: 15,
136
+ LineType.MINI_LOAD: 15
137
+ }
138
+
139
+ # Default minimum UNICEF fixed-term employees per day
140
+ FIXED_MIN_UNICEF_PER_DAY = 2
141
+
142
+ # Default line counts
143
+ LINE_COUNT_LONG_LINE = 3
144
+ LINE_COUNT_MINI_LOAD = 2
145
+
146
+ # Default max parallel workers per line (for UI)
147
+ MAX_PARALLEL_WORKERS_LONG_LINE = 7
148
+ MAX_PARALLEL_WORKERS_MINI_LOAD = 5
149
+
150
+ # Default cost rates (example values)
151
+ DEFAULT_COST_RATES = {
152
+ "UNICEF Fixed term": {
153
+ ShiftType.REGULAR: 43.27,
154
+ ShiftType.EVENING: 43.27,
155
+ ShiftType.OVERTIME: 64.91
156
+ },
157
+ "Humanizer": {
158
+ ShiftType.REGULAR: 27.94,
159
+ ShiftType.EVENING: 27.94,
160
+ ShiftType.OVERTIME: 41.91
161
+ }
162
+ }
163
+ #get employee type list from data files
164
+ EMPLOYEE_TYPE_LIST =extract.read_employee_data()["employment_type"].unique().tolist()
165
+ SHIFT_LIST = extract.get_shift_info()["id"].unique().tolist()
166
+ EVENING_SHIFT_MODE = "normal"
167
+ EVENING_SHIFT_DEMAND_THRESHOLD = 0.9
168
+
169
+ # Default schedule type
170
+ SCHEDULE_TYPE = "weekly"
171
+
172
+ # Default fixed staff mode
173
+ FIXED_STAFF_MODE = "priority"
174
+
175
+ # Default hourly rates for UI (simplified)
176
+ UNICEF_RATE_SHIFT_1 = 12.5
177
+ UNICEF_RATE_SHIFT_2 = 15.0
178
+ UNICEF_RATE_SHIFT_3 = 18.75
179
+ HUMANIZER_RATE_SHIFT_1 = 10.0
180
+ HUMANIZER_RATE_SHIFT_2 = 12.0
181
+ HUMANIZER_RATE_SHIFT_3 = 15.0
182
+ LINE_LIST = extract.read_packaging_line_data()["id"].unique().tolist()
183
+ LINE_CNT_PER_TYPE = extract.read_packaging_line_data().set_index("id")["line_count"].to_dict()
184
+
185
+ # Dynamic method to get max employee per type on day
186
+ @staticmethod
187
+ def get_max_employee_per_type_on_day(date_span):
188
+ """Get max employee per type configuration for given date span"""
189
+ return {
190
+ "UNICEF Fixed term": {
191
+ t: 8 for t in date_span
192
+ },
193
+ "Humanizer": {
194
+ t: 10 for t in date_span
195
+ }
196
+ }
197
+ MAX_UNICEF_PER_DAY = 8
198
+ MAX_HUMANIZER_PER_DAY = 10
199
+ MAX_HOUR_PER_PERSON_PER_DAY = 14
200
+ KIT_LEVELS, KIT_DEPENDENCIES, PRODUCTION_PRIORITY_ORDER = extract.get_production_order_data()
201
+
202
+
src/config/optimization_config.py ADDED
@@ -0,0 +1,450 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import src.preprocess.transform as transformed_data
3
+ import datetime
4
+ from datetime import timedelta
5
+ import src.preprocess.extract as extract
6
+ from src.config.constants import ShiftType, LineType, KitLevel, DefaultConfig
7
+
8
+ # Re-import all the packages
9
+ import importlib
10
+
11
+ # Reload modules to get latest changes - REMOVED to prevent infinite loops
12
+ # importlib.reload(extract)
13
+ # importlib.reload(transformed_data) # Uncomment if needed
14
+
15
+
16
+ def get_date_span():
17
+ """Get date span from streamlit session state, or return default"""
18
+ try:
19
+ import streamlit as st
20
+ if hasattr(st, 'session_state'):
21
+ # Get from session state without printing (avoid spam)
22
+ if 'start_date' in st.session_state and 'planning_days' in st.session_state:
23
+ from datetime import datetime, timedelta
24
+ start_date = datetime.combine(st.session_state.start_date, datetime.min.time())
25
+ planning_days = st.session_state.planning_days
26
+ end_date = start_date + timedelta(days=planning_days - 1)
27
+ date_span = list(range(1, planning_days + 1))
28
+ return date_span, start_date, end_date
29
+ except:
30
+ pass
31
+
32
+ # Default values - no printing to avoid spam
33
+ from datetime import datetime
34
+ return list(range(1, 6)), datetime(2025, 7, 7), datetime(2025, 7, 11)
35
+
36
+
37
+ # Only call get_date_span() when explicitly needed - avoid module-level execution
38
+ # DATE_SPAN, start_date, end_date = get_date_span() # REMOVED - called dynamically instead
39
+ DATE_SPAN = None
40
+ start_date = None
41
+ end_date = None
42
+
43
+ def get_product_list():
44
+ """Get filtered product list without printing spam"""
45
+ try:
46
+ from src.demand_filtering import DemandFilter
47
+ filter_instance = DemandFilter()
48
+ filter_instance.load_data(force_reload=True)
49
+ return filter_instance.get_filtered_product_list()
50
+ except:
51
+ # Fallback: get from session state start_date
52
+ date_span, start_date, end_date = get_date_span()
53
+ return transformed_data.get_released_product_list(start_date)
54
+
55
+
56
+ def get_employee_type_list():
57
+ """Get employee type list from session state or default"""
58
+ try:
59
+ import streamlit as st
60
+ if hasattr(st, 'session_state') and 'selected_employee_types' in st.session_state:
61
+ return st.session_state.selected_employee_types
62
+ except:
63
+ pass
64
+
65
+ # Default: load from data files
66
+ employee_type_list = extract.read_employee_data()
67
+ return employee_type_list["employment_type"].unique().tolist()
68
+
69
+
70
+ def get_shift_list():
71
+ """Get shift list from session state or default"""
72
+ try:
73
+ import streamlit as st
74
+ if hasattr(st, 'session_state') and 'selected_shifts' in st.session_state:
75
+ return st.session_state.selected_shifts
76
+ except:
77
+ pass
78
+
79
+ # Default: load from data files
80
+ shift_list = extract.get_shift_info()
81
+ return shift_list["id"].unique().tolist()
82
+
83
+ # Evening shift activation mode - define early to avoid circular dependency
84
+ # Options:
85
+ # "normal" - Only use regular shift (1) and overtime shift (3) - NO evening shift
86
+ # "activate_evening" - Allow evening shift (2) when demand is too high or cost-effective
87
+ # "always_available" - Evening shift always available as option
88
+ EVENING_SHIFT_MODE = "normal" # Default: only regular + overtime
89
+
90
+ # Evening shift activation threshold
91
+ # If demand cannot be met with regular + overtime, suggest evening shift activation
92
+ EVENING_SHIFT_DEMAND_THRESHOLD = 0.9 # Activate if regular+overtime capacity < 90% of demand
93
+
94
+ #Where?
95
+ def get_active_shift_list():
96
+ """
97
+ Get the list of active shifts based on EVENING_SHIFT_MODE setting.
98
+ """
99
+ all_shifts = get_shift_list()
100
+
101
+ if EVENING_SHIFT_MODE == "normal":
102
+ # Only regular and overtime shifts - NO evening shift
103
+ active_shifts = [s for s in all_shifts if s in ShiftType.REGULAR_AND_OVERTIME]
104
+ print(f"[SHIFT MODE] Normal mode: Using shifts {active_shifts} (Regular + Overtime only, NO evening)")
105
+
106
+ elif EVENING_SHIFT_MODE == "activate_evening":
107
+ # All shifts including evening (2)
108
+ active_shifts = list(all_shifts)
109
+ print(f"[SHIFT MODE] Evening activated: Using all shifts {active_shifts}")
110
+
111
+ elif EVENING_SHIFT_MODE == "always_available":
112
+ # All shifts always available
113
+ active_shifts = list(all_shifts)
114
+ print(f"[SHIFT MODE] Always available: Using all shifts {active_shifts}")
115
+
116
+ else:
117
+ # Default to normal mode
118
+ active_shifts = [s for s in all_shifts if s in ShiftType.REGULAR_AND_OVERTIME]
119
+ print(f"[SHIFT MODE] Unknown mode '{EVENING_SHIFT_MODE}', defaulting to normal: {active_shifts}")
120
+
121
+ return active_shifts
122
+
123
+ # DO NOT load at import time - always call get_active_shift_list() dynamically
124
+ # SHIFT_LIST = get_active_shift_list() # REMOVED - was causing stale data!
125
+
126
+ #where?
127
+ def get_line_list():
128
+ """Get line list - try from streamlit session state first, then from data files"""
129
+ try:
130
+ # Try to get from streamlit session state (from Dataset Metadata page)
131
+ import streamlit as st
132
+ if hasattr(st, 'session_state') and 'selected_lines' in st.session_state:
133
+ print(f"Using lines from Dataset Metadata page: {st.session_state.selected_lines}")
134
+ return st.session_state.selected_lines
135
+ except Exception as e:
136
+ print(f"Could not get lines from streamlit session: {e}")
137
+
138
+ # Default: load from data files
139
+ print(f"Loading line list from data files")
140
+ line_df = extract.read_packaging_line_data()
141
+ line_list = line_df["id"].unique().tolist()
142
+ return line_list
143
+
144
+ # DO NOT load at import time - always call get_line_list() dynamically
145
+ # LINE_LIST = get_line_list() # REMOVED - was causing stale data!
146
+
147
+ #where?
148
+ def get_kit_line_match():
149
+ kit_line_match = extract.read_kit_line_match_data()
150
+ kit_line_match_dict = kit_line_match.set_index("kit_name")["line_type"].to_dict()
151
+
152
+ # Create line name to ID mapping
153
+ line_name_to_id = {
154
+ "long line": LineType.LONG_LINE,
155
+ "mini load": LineType.MINI_LOAD,
156
+ "miniload": LineType.MINI_LOAD, # Alternative naming (no space)
157
+ "Long_line": LineType.LONG_LINE, # Alternative naming
158
+ "Mini_load": LineType.MINI_LOAD, # Alternative naming
159
+ }
160
+
161
+ # Convert string line names to numeric IDs
162
+ converted_dict = {}
163
+ for kit, line_name in kit_line_match_dict.items():
164
+ if isinstance(line_name, str) and line_name.strip():
165
+ # Convert string names to numeric IDs
166
+ line_id = line_name_to_id.get(line_name.strip(), None)
167
+ if line_id is not None:
168
+ converted_dict[kit] = line_id
169
+ else:
170
+ print(f"Warning: Unknown line type '{line_name}' for kit {kit}")
171
+ # Default to long line if unknown
172
+ converted_dict[kit] = LineType.LONG_LINE
173
+ elif isinstance(line_name, (int, float)) and not pd.isna(line_name):
174
+ # Already numeric
175
+ converted_dict[kit] = int(line_name)
176
+ else:
177
+ # Missing or empty line type - skip (no production needed for non-standalone masters)
178
+ pass # Don't add to converted_dict - these kits won't have line assignments
179
+
180
+ return converted_dict
181
+
182
+ KIT_LINE_MATCH_DICT = get_kit_line_match()
183
+
184
+
185
+ def get_line_cnt_per_type():
186
+ try:
187
+ # Try to get from streamlit session state (from config page)
188
+ import streamlit as st
189
+ if hasattr(st, 'session_state') and 'line_counts' in st.session_state:
190
+ print(f"Using line counts from config page: {st.session_state.line_counts}")
191
+ return st.session_state.line_counts
192
+ except Exception as e:
193
+ print(f"Could not get line counts from streamlit session: {e}")
194
+
195
+ print(f"Loading default line count values from data files")
196
+ line_df = extract.read_packaging_line_data()
197
+ line_cnt_per_type = line_df.set_index("id")["line_count"].to_dict()
198
+ print("line cnt per type", line_cnt_per_type)
199
+ return line_cnt_per_type
200
+
201
+ # DO NOT load at import time - always call get_line_cnt_per_type() dynamically
202
+ # LINE_CNT_PER_TYPE = get_line_cnt_per_type() # REMOVED - was causing stale data!
203
+
204
+ #where?
205
+ def get_demand_dictionary(force_reload=False):
206
+ """
207
+ Get filtered demand dictionary.
208
+ IMPORTANT: This dynamically loads data to reflect current Streamlit configs/dates.
209
+ """
210
+ try:
211
+ # Always get fresh filtered demand to reflect current configs
212
+ from src.demand_filtering import DemandFilter
213
+ filter_instance = DemandFilter()
214
+
215
+ # Force reload data to pick up new dates/configs
216
+ filter_instance.load_data(force_reload=True)
217
+
218
+ demand_dictionary = filter_instance.get_filtered_demand_dictionary()
219
+ print(f"📈 FRESH FILTERED DEMAND: {len(demand_dictionary)} products with total demand {sum(demand_dictionary.values())}")
220
+ print(f"🔄 LOADED DYNAMICALLY: Reflects current Streamlit configs")
221
+ return demand_dictionary
222
+ except Exception as e:
223
+ print(f"Error loading dynamic demand dictionary: {e}")
224
+ raise Exception("Demand dictionary not found with error:"+str(e))
225
+
226
+ # DO NOT load at import time - always call get_demand_dictionary() dynamically
227
+ # DEMAND_DICTIONARY = get_demand_dictionary() # REMOVED - was causing stale data!
228
+
229
+ #delete as already using default cost rates
230
+ def get_cost_list_per_emp_shift():
231
+ try:
232
+ # Try to get from streamlit session state (from config page)
233
+ import streamlit as st
234
+ if hasattr(st, 'session_state') and 'cost_list_per_emp_shift' in st.session_state:
235
+ print(f"Using cost list from config page: {st.session_state.cost_list_per_emp_shift}")
236
+ return st.session_state.cost_list_per_emp_shift
237
+ except Exception as e:
238
+ print(f"Could not get cost list from streamlit session: {e}")
239
+
240
+ print(f"Loading default cost values")
241
+ # Default hourly rates - Important: multiple employment types with different costs
242
+ return DefaultConfig.DEFAULT_COST_RATES
243
+
244
+ def shift_code_to_name():
245
+ return ShiftType.get_all_names()
246
+
247
+ def line_code_to_name():
248
+ """Convert line type IDs to readable names"""
249
+ return LineType.get_all_names()
250
+
251
+ # DO NOT load at import time - always call get_cost_list_per_emp_shift() dynamically
252
+ # COST_LIST_PER_EMP_SHIFT = get_cost_list_per_emp_shift() # REMOVED - was causing stale data!
253
+
254
+
255
+
256
+ # COST_LIST_PER_EMP_SHIFT = { # WH_Workforce_Hourly_Pay_Scale
257
+ # "Fixed": {1: 0, 2: 22, 3: 18},
258
+ # "Humanizer": {1: 10, 2: 10, 3: 10},
259
+ # }
260
+
261
+
262
+
263
+
264
+
265
+
266
+ #where to put?
267
+ def get_team_requirements(product_list=None):
268
+ """
269
+ Extract team requirements from Kits Calculation CSV.
270
+ Returns dictionary with employee type as key and product requirements as nested dict.
271
+ """
272
+ if product_list is None:
273
+ product_list = get_product_list() # Get fresh product list
274
+
275
+
276
+ kits_df = extract.read_personnel_requirement_data()
277
+
278
+ team_req_dict = {
279
+ "UNICEF Fixed term": {},
280
+ "Humanizer": {}
281
+ }
282
+
283
+ # Process each product in the product list
284
+ for product in product_list:
285
+ print("product",product)
286
+ print(f"Processing team requirements for product: {product}")
287
+ product_data = kits_df[kits_df['Kit'] == product]
288
+ print("product_data",product_data)
289
+ if not product_data.empty:
290
+ # Extract Humanizer and UNICEF staff requirements
291
+ humanizer_req = product_data["Humanizer"].iloc[0]
292
+ unicef_req = product_data["UNICEF staff"].iloc[0]
293
+
294
+ # Convert to int (data is already cleaned in extract function)
295
+ team_req_dict["Humanizer"][product] = int(humanizer_req)
296
+ team_req_dict["UNICEF Fixed term"][product] = int(unicef_req)
297
+ else:
298
+ print(f"Warning: Product {product} not found in Kits Calculation data, setting requirements to 0")
299
+
300
+
301
+ return team_req_dict
302
+
303
+
304
+
305
+ def get_max_employee_per_type_on_day():
306
+ try:
307
+ # Try to get from streamlit session state (from config page)
308
+ import streamlit as st
309
+ if hasattr(st, 'session_state') and 'max_employee_per_type_on_day' in st.session_state:
310
+ print(f"Using max employee counts from config page: {st.session_state.max_employee_per_type_on_day}")
311
+ return st.session_state.max_employee_per_type_on_day
312
+ except Exception as e:
313
+ print(f"Could not get max employee counts from streamlit session: {e}")
314
+
315
+ print(f"Loading default max employee values")
316
+ # Get date span dynamically if not available
317
+ if DATE_SPAN is None:
318
+ date_span, _, _ = get_date_span()
319
+ else:
320
+ date_span = DATE_SPAN
321
+
322
+ max_employee_per_type_on_day = {
323
+ "UNICEF Fixed term": {
324
+ t: 8 for t in date_span
325
+ },
326
+ "Humanizer": {
327
+ t: 10 for t in date_span
328
+ }
329
+ }
330
+ return max_employee_per_type_on_day
331
+
332
+
333
+ # Keep the constant for backward compatibility, but use function instead
334
+ MAX_HOUR_PER_PERSON_PER_DAY = 14 # legal standard
335
+ def get_max_hour_per_shift_per_person():
336
+ """Get max hours per shift per person from session state or default"""
337
+ try:
338
+ import streamlit as st
339
+ if hasattr(st, 'session_state'):
340
+ # Build from individual session state values
341
+ max_hours = {
342
+ ShiftType.REGULAR: st.session_state.get('max_hours_shift_1', DefaultConfig.MAX_HOUR_PER_SHIFT_PER_PERSON[ShiftType.REGULAR]),
343
+ ShiftType.EVENING: st.session_state.get('max_hours_shift_2', DefaultConfig.MAX_HOUR_PER_SHIFT_PER_PERSON[ShiftType.EVENING]),
344
+ ShiftType.OVERTIME: st.session_state.get('max_hours_shift_3', DefaultConfig.MAX_HOUR_PER_SHIFT_PER_PERSON[ShiftType.OVERTIME])
345
+ }
346
+ return max_hours
347
+ except Exception as e:
348
+ print(f"Could not get max hours per shift from session: {e}")
349
+
350
+ # Fallback to default
351
+ return DefaultConfig.MAX_HOUR_PER_SHIFT_PER_PERSON
352
+
353
+
354
+
355
+ # Keep these complex getters that access DefaultConfig or have complex logic:
356
+ def get_evening_shift_demand_threshold():
357
+ """Get evening shift demand threshold from session state or default"""
358
+ try:
359
+ import streamlit as st
360
+ if hasattr(st, 'session_state'):
361
+ return st.session_state.get('evening_shift_threshold', DefaultConfig.EVENING_SHIFT_DEMAND_THRESHOLD)
362
+ except Exception as e:
363
+ print(f"Could not get evening shift threshold from session: {e}")
364
+
365
+ # Fallback to default
366
+ return DefaultConfig.EVENING_SHIFT_DEMAND_THRESHOLD
367
+
368
+
369
+ # ---- Kit Hierarchy for Production Ordering ----
370
+ def get_kit_hierarchy_data():
371
+ kit_levels, dependencies, priority_order = extract.get_production_order_data()
372
+
373
+ return kit_levels, dependencies, priority_order
374
+
375
+ KIT_LEVELS, KIT_DEPENDENCIES, PRODUCTION_PRIORITY_ORDER = get_kit_hierarchy_data()
376
+ print(f"Kit Hierarchy loaded: {len(KIT_LEVELS)} kits, Priority order: {len(PRODUCTION_PRIORITY_ORDER)} items")
377
+
378
+ def get_kit_levels():
379
+ """Get kit levels lazily - returns {kit_id: level} where 0=prepack, 1=subkit, 2=master"""
380
+ kit_levels, _, _ = get_kit_hierarchy_data()
381
+ return kit_levels
382
+
383
+ def get_kit_dependencies():
384
+ """Get kit dependencies lazily - returns {kit_id: [dependency_list]}"""
385
+ _, dependencies, _ = get_kit_hierarchy_data()
386
+ return dependencies
387
+
388
+ def get_max_parallel_workers():
389
+ """Get max parallel workers from session state or default"""
390
+ try:
391
+ import streamlit as st
392
+ if hasattr(st, 'session_state'):
393
+ # Build from individual session state values
394
+ max_parallel_workers = {
395
+ LineType.LONG_LINE: st.session_state.get('max_parallel_workers_long_line', DefaultConfig.MAX_PARALLEL_WORKERS_LONG_LINE),
396
+ LineType.MINI_LOAD: st.session_state.get('max_parallel_workers_mini_load', DefaultConfig.MAX_PARALLEL_WORKERS_MINI_LOAD)
397
+ }
398
+ return max_parallel_workers
399
+ except Exception as e:
400
+ print(f"Could not get max parallel workers from session: {e}")
401
+
402
+ # Fallback to default
403
+ return {
404
+ LineType.LONG_LINE: DefaultConfig.MAX_PARALLEL_WORKERS_LONG_LINE,
405
+ LineType.MINI_LOAD: DefaultConfig.MAX_PARALLEL_WORKERS_MINI_LOAD
406
+ }
407
+
408
+
409
+
410
+ def get_fixed_min_unicef_per_day():
411
+ """
412
+ Get fixed minimum UNICEF employees per day - try from streamlit session state first, then default
413
+ This ensures a minimum number of UNICEF fixed-term staff are present every working day
414
+ """
415
+ try:
416
+ import streamlit as st
417
+ if hasattr(st, 'session_state') and 'fixed_min_unicef_per_day' in st.session_state:
418
+ print(f"Using fixed minimum UNICEF per day from config page: {st.session_state.fixed_min_unicef_per_day}")
419
+ return st.session_state.fixed_min_unicef_per_day
420
+ except ImportError:
421
+ pass
422
+
423
+ # Fallback to default configuration
424
+ return DefaultConfig.FIXED_MIN_UNICEF_PER_DAY
425
+
426
+
427
+ def get_payment_mode_config():
428
+ """
429
+ Get payment mode configuration - try from streamlit session state first, then default values
430
+ Payment modes:
431
+ - "bulk": If employee works any hours in shift, pay for full shift hours
432
+ - "partial": Pay only for actual hours worked
433
+ """
434
+ try:
435
+ # Try to get from streamlit session state (from Dataset Metadata page)
436
+ import streamlit as st
437
+ if hasattr(st, 'session_state') and 'payment_mode_config' in st.session_state:
438
+ print(f"Using payment mode config from streamlit session: {st.session_state.payment_mode_config}")
439
+ return st.session_state.payment_mode_config
440
+ except Exception as e:
441
+ print(f"Could not get payment mode config from streamlit session: {e}")
442
+
443
+ # Default payment mode configuration
444
+ print(f"Loading default payment mode configuration")
445
+ payment_mode_config = DefaultConfig.PAYMENT_MODE_CONFIG
446
+
447
+ return payment_mode_config
448
+
449
+
450
+ print("✅ Module-level configuration functions defined (variables initialized dynamically)")
src/config/paths.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Data Paths Configuration
2
+ # All paths are relative to the project root directory
3
+
4
+ data:
5
+ # CSV data files
6
+ csv:
7
+ demand: "data/real_data_excel/converted_csv/COOIS_Planned_and_Released.csv"
8
+ kit_composition: "data/real_data_excel/converted_csv/Kit_Composition_and_relation_cleaned_with_line_type.csv"
9
+ workforce_pay_scale: "data/real_data_excel/converted_csv/WH_Workforce_Hourly_Pay_Scale_processed.csv"
10
+ work_shift: "data/real_data_excel/converted_csv/work_shift.csv"
11
+ work_center_capacity: "data/real_data_excel/converted_csv/Work_Centre_Capacity.csv"
12
+ work_center_capacity_processed: "data/real_data_excel/converted_csv/Work_Centre_Capacity_processed.csv"
13
+ material_master: "data/real_data_excel/converted_csv/Material_Master_WMS.csv"
14
+ kits_calculation: "data/real_data_excel/converted_csv/Kits__Calculation.csv"
15
+
16
+ # Hierarchy data
17
+ hierarchy:
18
+ kit_hierarchy: "data/hierarchy_exports/kit_hierarchy.json"
19
+
src/demand_filtering.py ADDED
@@ -0,0 +1,413 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Demand Data Filtering Module
3
+
4
+ This module filters demand data to include only products that are ready for optimization.
5
+ Excludes products that:
6
+ 1. Have no line type assignments (non-standalone masters)
7
+ 2. Have zero staffing requirements (both Humanizer and UNICEF staff = 0)
8
+
9
+ The filtered data is used by the optimization system.
10
+ """
11
+
12
+ import pandas as pd
13
+ from typing import Dict, List, Tuple
14
+ from src.preprocess import extract
15
+
16
+
17
+ class DemandFilter:
18
+ """
19
+ Filters demand data to include only products ready for optimization
20
+ """
21
+
22
+ def __init__(self):
23
+ self.demand_data = None
24
+ self.kit_levels = None
25
+ self.kit_dependencies = None
26
+ self.line_assignments = None
27
+ self.team_requirements = None
28
+ self.speed_data = None
29
+
30
+ def load_data(self, force_reload=False):
31
+ """Load all necessary data for filtering"""
32
+ try:
33
+ # Skip loading if data already exists and not forcing reload
34
+ if not force_reload and self.demand_data is not None:
35
+ print("📊 Using cached filter data (set force_reload=True to refresh)")
36
+ return True
37
+
38
+ print("🔄 Loading fresh filtering data...")
39
+ # Get start date for demand data from optimization config
40
+ from src.config.optimization_config import get_date_span
41
+ date_span, start_date, end_date = get_date_span()
42
+ print(f"🗓️ DEMAND FILTERING DATE: Using {start_date.date() if start_date else 'None'} (same as optimization)")
43
+
44
+ # Load demand data directly from extract
45
+ demand_df = extract.read_orders_data(start_date=start_date)
46
+ self.demand_data = demand_df.groupby('Material Number')["Order quantity (GMEIN)"].sum().to_dict()
47
+
48
+ # Load kit hierarchy data
49
+ kit_levels, dependencies, _ = extract.get_production_order_data()
50
+ self.kit_levels = kit_levels
51
+ self.kit_dependencies = dependencies
52
+
53
+ # Load line assignments from kit line match data
54
+ kit_line_match = extract.read_kit_line_match_data()
55
+ kit_line_match_dict = kit_line_match.set_index("kit_name")["line_type"].to_dict()
56
+
57
+ # Convert string line names to numeric IDs
58
+ from src.config.constants import LineType
59
+ line_name_to_id = {
60
+ "long line": LineType.LONG_LINE,
61
+ "mini load": LineType.MINI_LOAD,
62
+ "miniload": LineType.MINI_LOAD,
63
+ "Long_line": LineType.LONG_LINE,
64
+ "Mini_load": LineType.MINI_LOAD,
65
+ }
66
+
67
+ self.line_assignments = {}
68
+ for kit, line_name in kit_line_match_dict.items():
69
+ if isinstance(line_name, str) and line_name.strip():
70
+ line_id = line_name_to_id.get(line_name.strip())
71
+ if line_id is not None:
72
+ self.line_assignments[kit] = line_id
73
+ elif isinstance(line_name, (int, float)) and not pd.isna(line_name):
74
+ self.line_assignments[kit] = int(line_name)
75
+
76
+ # Load team requirements from Kits Calculation data
77
+ kits_df = extract.read_personnel_requirement_data()
78
+ self.team_requirements = {
79
+ 'UNICEF Fixed term': kits_df.set_index('Kit')['UNICEF staff'].to_dict(),
80
+ 'Humanizer': kits_df.set_index('Kit')['Humanizer'].to_dict()
81
+ }
82
+
83
+ # Load production speed data
84
+ self.speed_data = extract.read_package_speed_data()
85
+
86
+ print(f"✅ Filtering data loaded: {len(self.demand_data)} products with demand, {len(self.speed_data)} with speed data")
87
+ return True
88
+
89
+ except Exception as e:
90
+ print(f"Error loading data for filtering: {str(e)}")
91
+ return False
92
+
93
+
94
+ def standalone_master_filter(self, product_id: str) -> Tuple[str, bool]:
95
+ """
96
+ Classify product type and check if it's a standalone master.
97
+
98
+ Returns:
99
+ Tuple[str, bool]: (product_type, is_standalone_master)
100
+ """
101
+ if product_id in self.kit_levels:
102
+ level = self.kit_levels[product_id]
103
+
104
+ if level == 0:
105
+ return "prepack", False
106
+ elif level == 1:
107
+ return "subkit", False
108
+ elif level == 2:
109
+ # Check if this master is standalone (no subkits/prepacks)
110
+ dependencies = self.kit_dependencies.get(product_id, [])
111
+ is_standalone = len(dependencies) == 0
112
+ return "master", is_standalone
113
+ else:
114
+ return "unknown", False
115
+ else:
116
+ return "unclassified", False
117
+
118
+ def _get_line_type_capacity(self, line_type: int) -> int:
119
+ """
120
+ Calculate the total capacity in hours for a specific line type.
121
+
122
+ Args:
123
+ line_type: The line type ID (e.g., 6 for Long Line, 7 for Mini Load)
124
+
125
+ Returns:
126
+ int: Total capacity in hours for this line type
127
+ """
128
+ from src.config.optimization_config import get_line_cnt_per_type, get_max_hour_per_shift_per_person, get_active_shift_list, get_date_span
129
+
130
+ line_cnt_per_type = get_line_cnt_per_type()
131
+ max_hours_per_shift_dict = get_max_hour_per_shift_per_person()
132
+ active_shifts = get_active_shift_list()
133
+ date_span, _, _ = get_date_span() # Get date span dynamically
134
+
135
+ # Get line count for this specific line type
136
+ line_count = line_cnt_per_type.get(line_type, 0)
137
+
138
+ # Calculate total hours per day (sum of all active shift hours)
139
+ total_hours_per_day = sum(max_hours_per_shift_dict.get(shift, 0) for shift in active_shifts)
140
+
141
+ # Calculate available capacity hours
142
+ # Available hours = line_count × total_hours_per_day × days_in_period
143
+ available_hours = line_count * total_hours_per_day * len(date_span)
144
+
145
+ return available_hours
146
+
147
+ def get_maximum_packaging_capacity(self) -> int:
148
+ """
149
+ Get the maximum packaging capacity across all line types.
150
+
151
+ Returns:
152
+ int: Maximum total capacity in hours across all lines
153
+ """
154
+ from src.config.optimization_config import get_line_cnt_per_type
155
+
156
+ line_cnt_per_type = get_line_cnt_per_type()
157
+ total_capacity = 0
158
+
159
+ for line_type, line_count in line_cnt_per_type.items():
160
+ if line_count > 0: # Only count active lines
161
+ line_capacity = self._get_line_type_capacity(line_type)
162
+ total_capacity += line_capacity
163
+
164
+ return total_capacity
165
+
166
+ def too_high_demand_filter(self, product_id: str) -> bool:
167
+ """
168
+ Check if the demand for a product is too high.
169
+
170
+ A product has "too high demand" when the total processing hours needed
171
+ exceeds the available capacity hours for the product's assigned line type.
172
+
173
+ NOTE: This method assumes all prerequisite data is available (demand > 0,
174
+ line assignment exists, speed data exists). The main filter function
175
+ should handle these edge cases.
176
+
177
+ Calculation:
178
+ - Processing hours needed = demand_quantity / production_speed_per_hour
179
+ - Available hours = line_count × hours_per_shift × shifts_per_day × days_in_period
180
+
181
+ Args:
182
+ product_id: The product ID to check
183
+
184
+ Returns:
185
+ bool: True if demand is too high (should be excluded), False otherwise
186
+ """
187
+ # Get demand for this product (assumes demand > 0, checked by main filter)
188
+ demand = self.demand_data.get(product_id, 0)
189
+ if demand <= 0:
190
+ return False
191
+ # Get line assignment for this product (assumes exists, checked by main filter)
192
+ if self.line_assignments is None or product_id not in self.line_assignments:
193
+ return False
194
+ line_type = self.line_assignments.get(product_id)
195
+
196
+ # Get production speed data (assumes exists, checked by main filter)
197
+ if self.speed_data is None or product_id not in self.speed_data:
198
+ return False
199
+ production_speed_per_hour = self.speed_data[product_id]
200
+
201
+ # Calculate processing hours needed
202
+ processing_hours_needed = demand / production_speed_per_hour
203
+
204
+ # Get available capacity for this specific line type
205
+ available_hours = self._get_line_type_capacity(line_type)
206
+
207
+ # Check if processing hours needed exceeds available capacity
208
+ is_too_high = processing_hours_needed > available_hours
209
+
210
+ if is_too_high:
211
+ print(f"⚠️ HIGH DEMAND WARNING: {product_id} needs {processing_hours_needed:.1f}h but only {available_hours:.1f}h available (line_type={line_type}, demand={demand}, speed={production_speed_per_hour:.1f}/h)")
212
+
213
+ return is_too_high
214
+
215
+ def is_product_ready_for_optimization(self, product_id: str) -> Tuple[bool, List[str]]:
216
+ """
217
+ Check if a single product is ready for optimization.
218
+ 1) Should have demand higher than 0
219
+ 2) Should be right type - standalone master, subkit, prepack
220
+ 3) Should have line assignment
221
+ 4) Should have staffing requirements
222
+ 5) Should have production speed data
223
+
224
+ Returns:
225
+ Tuple[bool, List[str]]: (is_ready, exclusion_reasons)
226
+ """
227
+ exclusion_reasons = []
228
+
229
+ # Check if product has positive demand
230
+ demand = self.demand_data.get(product_id, 0)
231
+ if demand <= 0:
232
+ exclusion_reasons.append("No demand or zero demand")
233
+
234
+ # Classify product type
235
+ product_type, is_standalone_master = self.standalone_master_filter(product_id)
236
+
237
+ # Check line assignment logic
238
+ has_line_assignment = product_id in self.line_assignments
239
+
240
+ # For masters: standalone should have line assignment, non-standalone should NOT
241
+
242
+ if product_type == "master":
243
+ if is_standalone_master:
244
+ if not has_line_assignment:
245
+ exclusion_reasons.append("Standalone master missing line assignment")
246
+ elif self.line_assignments.get(product_id) != 6: # 6 = LONG_LINE
247
+ exclusion_reasons.append("Standalone master should have long line assignment")
248
+ else:
249
+ # Non-standalone masters should NOT have line assignment (excluded from production)
250
+ exclusion_reasons.append("Non-standalone master (excluded from production)")
251
+ else:
252
+ # For subkits and prepacks, check normal line assignment
253
+ if not has_line_assignment:
254
+ exclusion_reasons.append("No line assignment")
255
+
256
+ # Check staffing requirements
257
+ unicef_staff = self.team_requirements.get('UNICEF Fixed term', {}).get(product_id, 0)
258
+ humanizer_staff = self.team_requirements.get('Humanizer', {}).get(product_id, 0)
259
+ total_staff = unicef_staff + humanizer_staff
260
+
261
+ if total_staff == 0:
262
+ exclusion_reasons.append("Zero staffing requirements")
263
+
264
+ # Check production speed data
265
+ if self.speed_data is None or product_id not in self.speed_data:
266
+ exclusion_reasons.append("Missing production speed data")
267
+
268
+ # Check if demand is too high (only if we have all required data)
269
+ if self.too_high_demand_filter(product_id):
270
+ exclusion_reasons.append("Demand exceeds available production capacity")
271
+
272
+
273
+
274
+
275
+ is_ready = len(exclusion_reasons) == 0
276
+ return is_ready, exclusion_reasons
277
+
278
+ def filter_products(self) -> Tuple[List[str], Dict[str, int], List[str], Dict[str, int]]:
279
+ """
280
+ Filter products into included and excluded lists based on optimization readiness.
281
+ Uses is_product_ready_for_optimization() to check all criteria.
282
+
283
+ Returns:
284
+ Tuple containing:
285
+ - included_products: List of product IDs ready for optimization
286
+ - included_demand: Dict of {product_id: demand} for included products
287
+ - excluded_products: List of product IDs excluded from optimization
288
+ - excluded_demand: Dict of {product_id: demand} for excluded products
289
+ """
290
+ if not self.load_data():
291
+ raise Exception("Failed to load data for filtering")
292
+
293
+ included_products = []
294
+ included_demand = {}
295
+ excluded_products = []
296
+ excluded_demand = {}
297
+ excluded_details = {}
298
+
299
+ for product_id, demand in self.demand_data.items():
300
+ is_ready, exclusion_reasons = self.is_product_ready_for_optimization(product_id)
301
+
302
+ if is_ready:
303
+ included_products.append(product_id)
304
+ included_demand[product_id] = demand
305
+ else:
306
+ excluded_products.append(product_id)
307
+ excluded_demand[product_id] = demand
308
+ excluded_details[product_id] = exclusion_reasons
309
+
310
+ # Sort products for consistent output
311
+ included_products.sort()
312
+ excluded_products.sort()
313
+ # Print data quality warnings for included products
314
+ included_without_hierarchy = sum(1 for pid in included_products if self.standalone_master_filter(pid)[0] == "unclassified")
315
+ if included_without_hierarchy > 0:
316
+ print(f"\n⚠️ DATA QUALITY WARNING: {included_without_hierarchy} included products missing hierarchy data")
317
+
318
+ return included_products, included_demand, excluded_products, excluded_demand
319
+
320
+ def get_filtered_product_list(self) -> List[str]:
321
+ """Get list of products ready for optimization"""
322
+ included_products, _, _, _ = self.filter_products()
323
+ return included_products
324
+
325
+ def get_filtered_demand_dictionary(self) -> Dict[str, int]:
326
+ """Get demand dictionary for products ready for optimization"""
327
+ _, included_demand, _, _ = self.filter_products()
328
+ return included_demand
329
+
330
+ def get_complete_product_analysis(self) -> Dict:
331
+ """Get complete analysis of all products for visualization"""
332
+ included_products, included_demand, excluded_products, excluded_demand = self.filter_products()
333
+
334
+ all_products = {**included_demand, **excluded_demand}
335
+ product_details = {}
336
+
337
+ # Load speed data for additional validation
338
+ speed_data = None
339
+ try:
340
+ from src.config import optimization_config
341
+ from src.preprocess import extract
342
+ speed_data = extract.read_package_speed_data()
343
+ except Exception as e:
344
+ print(f"Warning: Could not load speed data for analysis: {e}")
345
+
346
+ for product_id, demand in all_products.items():
347
+ product_type, is_standalone_master = self.standalone_master_filter(product_id)
348
+ is_ready, exclusion_reasons = self.is_product_ready_for_optimization(product_id)
349
+
350
+ # Get staffing info
351
+ unicef_staff = self.team_requirements.get('UNICEF Fixed term', {}).get(product_id, 0)
352
+ humanizer_staff = self.team_requirements.get('Humanizer', {}).get(product_id, 0)
353
+
354
+ # Get line assignment
355
+ line_assignment = self.line_assignments.get(product_id)
356
+
357
+ # Get production speed info
358
+ has_speed_data = speed_data is not None and product_id in speed_data
359
+
360
+ # too high demand
361
+ has_too_high_demand = self.too_high_demand_filter(product_id)
362
+
363
+ product_details[product_id] = {
364
+ 'demand': demand,
365
+ 'product_type': product_type,
366
+ 'is_standalone_master': is_standalone_master,
367
+ 'is_included_in_optimization': is_ready,
368
+ 'exclusion_reasons': exclusion_reasons,
369
+ 'unicef_staff': unicef_staff,
370
+ 'humanizer_staff': humanizer_staff,
371
+ 'total_staff': unicef_staff + humanizer_staff,
372
+ 'line_assignment': line_assignment,
373
+ 'has_line_assignment': line_assignment is not None,
374
+ 'has_staffing': (unicef_staff + humanizer_staff) > 0,
375
+ 'has_hierarchy': product_type != "unclassified",
376
+ 'has_speed_data': has_speed_data,
377
+ 'has_too_high_demand': has_too_high_demand
378
+ }
379
+
380
+ # Calculate data quality statistics for included products
381
+ included_without_speed = sum(1 for pid in included_products if not product_details[pid]['has_speed_data'])
382
+ included_without_hierarchy = sum(1 for pid in included_products if not product_details[pid]['has_hierarchy'])
383
+
384
+ # Count products excluded due to too high demand
385
+ excluded_with_too_high_demand = sum(1 for pid in excluded_products if product_details[pid]['has_too_high_demand'])
386
+ return {
387
+ 'included_count': len(included_products),
388
+ 'included_demand': sum(included_demand.values()),
389
+ 'excluded_count': len(excluded_products),
390
+ 'excluded_demand': sum(excluded_demand.values()),
391
+ 'total_products': len(all_products),
392
+ 'total_demand': sum(all_products.values()),
393
+ 'product_details': product_details,
394
+ 'standalone_masters_count': sum(1 for p in product_details.values() if p['is_standalone_master']),
395
+ 'included_products': included_products,
396
+ 'excluded_products': excluded_products,
397
+ # Data quality metrics for included products
398
+ 'included_missing_speed_count': included_without_speed,
399
+ 'included_missing_hierarchy_count': included_without_hierarchy,
400
+ 'excluded_with_too_high_demand_count': excluded_with_too_high_demand
401
+ }
402
+
403
+
404
+ # Test script when run directly
405
+
406
+ if __name__ == "__main__":
407
+ # Test the filtering
408
+ filter_instance = DemandFilter()
409
+ included_products, included_demand, excluded_products, excluded_demand = filter_instance.filter_products()
410
+
411
+ print(f"\n=== FILTERING TEST RESULTS ===")
412
+ print(f"Included products: {included_products[:5]}..." if len(included_products) > 5 else f"Included products: {included_products}")
413
+ print(f"Excluded products: {excluded_products[:5]}..." if len(excluded_products) > 5 else f"Excluded products: {excluded_products}")
src/demand_validation_viz.py ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Demand Data Validation Visualization Module
4
+
5
+ Provides Streamlit visualization for demand data validation.
6
+ Shows which products are included/excluded from optimization and why.
7
+ """
8
+
9
+ import pandas as pd
10
+ import streamlit as st
11
+ from typing import Dict
12
+ from src.config.constants import LineType
13
+ from src.demand_filtering import DemandFilter
14
+
15
+
16
+ # Simple mapping for product level names
17
+ LEVEL_NAMES = {
18
+ 'prepack': 'prepack',
19
+ 'subkit': 'subkit',
20
+ 'master': {
21
+ 'standalone': 'standalone_master',
22
+ 'with_hierarchy': 'master_with_hierarchy'
23
+ },
24
+ 'unclassified': 'no_hierarchy_data'
25
+ }
26
+
27
+
28
+ class DemandValidationViz:
29
+ """
30
+ Simple visualization wrapper for demand filtering results.
31
+ All filtering logic is in DemandFilter - this just displays the results.
32
+ """
33
+
34
+ def __init__(self):
35
+ self.filter_instance = DemandFilter()
36
+ self.speed_data = None
37
+
38
+ def load_data(self):
39
+ """Load all data needed for visualization"""
40
+ try:
41
+ from src.config import optimization_config
42
+ from src.preprocess import extract
43
+ self.speed_data = extract.read_package_speed_data()
44
+ return self.filter_instance.load_data()
45
+ except Exception as e:
46
+ error_msg = f"Error loading data: {str(e)}"
47
+ print(error_msg)
48
+ if st:
49
+ st.error(error_msg)
50
+ return False
51
+
52
+ def validate_all_products(self) -> pd.DataFrame:
53
+ """
54
+ Create DataFrame with validation results for all products.
55
+ Main visualization method - converts filtering results to displayable format.
56
+ """
57
+ # Get analysis from filtering module
58
+ analysis = self.filter_instance.get_complete_product_analysis()
59
+ product_details = analysis['product_details']
60
+
61
+ results = []
62
+ for product_id, details in product_details.items():
63
+ # Calculate production hours if speed data available
64
+ speed = self.speed_data.get(product_id) if self.speed_data else None
65
+ production_hours = (details['demand'] / speed) if speed and speed > 0 else None
66
+
67
+ # Get line type name
68
+ line_type_id = details['line_assignment']
69
+ line_name = LineType.get_name(line_type_id) if line_type_id is not None else "no_assignment"
70
+
71
+ # Get level name (simplified)
72
+ ptype = details['product_type']
73
+ if ptype == 'unclassified':
74
+ level_name = LEVEL_NAMES['unclassified']
75
+ elif ptype == 'master':
76
+ level_name = LEVEL_NAMES['master']['standalone' if details['is_standalone_master'] else 'with_hierarchy']
77
+ else:
78
+ level_name = LEVEL_NAMES.get(ptype, f"level_{ptype}")
79
+
80
+ # Build validation status message
81
+ if not details['is_included_in_optimization']:
82
+ validation_status = f"🚫 Excluded: {', '.join(details['exclusion_reasons'])}"
83
+ else:
84
+ issues = []
85
+ if speed is None:
86
+ issues.append("missing_speed_data (will use default)")
87
+ if not details['has_hierarchy']:
88
+ issues.append("no_hierarchy_data")
89
+ validation_status = f"⚠️ Data Issues: {', '.join(issues)}" if issues else "✅ Ready for optimization"
90
+
91
+
92
+
93
+ if details['has_too_high_demand']:
94
+ issues.append("too_high_demand")
95
+ validation_status = f"⚠️ Data Issues: {', '.join(issues)}" if issues else "✅ Ready for optimization"
96
+ results.append({
97
+ 'Product ID': product_id,
98
+ 'Demand': details['demand'],
99
+ 'Product Type': ptype.title(),
100
+ 'Level': level_name,
101
+ 'Is Standalone Master': "Yes" if details['is_standalone_master'] else "No",
102
+ 'Line Type ID': line_type_id if line_type_id else "N/A",
103
+ 'Line Type': line_name,
104
+ 'UNICEF Staff': details['unicef_staff'],
105
+ 'Humanizer Staff': details['humanizer_staff'],
106
+ 'Total Staff': details['total_staff'],
107
+ 'Production Speed (units/hour)': f"{speed:.1f}" if speed else "N/A",
108
+ 'Production Hours Needed': f"{production_hours:.1f}" if production_hours else "N/A",
109
+ 'Has Line Assignment': "✅" if details['has_line_assignment'] else "❌",
110
+ 'Has Staffing Data': "✅" if details['has_staffing'] else "❌",
111
+ 'Has Speed Data': "✅" if speed is not None else "❌ (will use default)",
112
+ 'Has Hierarchy Data': "✅" if details['has_hierarchy'] else "❌",
113
+ 'Excluded from Optimization': not details['is_included_in_optimization'],
114
+ 'Exclusion Reasons': ', '.join(details['exclusion_reasons']) if details['exclusion_reasons'] else '',
115
+ 'Data Quality Issues': ', '.join(issues) if details['is_included_in_optimization'] and 'issues' in locals() and issues else '',
116
+ 'Has Too High Demand': "✅" if details['has_too_high_demand'] else "❌",
117
+ 'Validation Status': validation_status
118
+ })
119
+
120
+ df = pd.DataFrame(results)
121
+ df = df.sort_values(['Excluded from Optimization', 'Demand'], ascending=[False, False])
122
+ return df
123
+
124
+ def get_summary_statistics(self, df: pd.DataFrame) -> Dict:
125
+ """Calculate summary statistics from validation results"""
126
+ analysis = self.filter_instance.get_complete_product_analysis()
127
+ included_df = df[df['Excluded from Optimization'] == False]
128
+
129
+ return {
130
+ 'total_products': analysis['total_products'],
131
+ 'total_demand': analysis['total_demand'],
132
+ 'included_products': analysis['included_count'],
133
+ 'excluded_products': analysis['excluded_count'],
134
+ 'included_demand': analysis['included_demand'],
135
+ 'excluded_demand': analysis['excluded_demand'],
136
+ 'type_counts': df['Product Type'].value_counts().to_dict(),
137
+ 'no_line_assignment': len(included_df[included_df['Has Line Assignment'] == "❌"]),
138
+ 'no_staffing': len(included_df[included_df['Has Staffing Data'] == "❌"]),
139
+ 'no_speed': len(included_df[included_df['Has Speed Data'].str.contains("❌")]),
140
+ 'no_hierarchy': len(included_df[included_df['Has Hierarchy Data'] == "❌"]),
141
+ 'standalone_masters': analysis['standalone_masters_count'],
142
+ 'total_unicef_needed': sum(p['unicef_staff'] for p in analysis['product_details'].values()),
143
+ 'total_humanizer_needed': sum(p['humanizer_staff'] for p in analysis['product_details'].values()),
144
+ 'excluded_with_too_high_demand': analysis['excluded_with_too_high_demand_count']
145
+ }
146
+
147
+
148
+ def display_demand_validation():
149
+ """
150
+ Display demand validation analysis in Streamlit.
151
+ Main entry point for the validation page.
152
+ """
153
+ st.header("📋 Demand Data Validation")
154
+ st.markdown("Analysis showing which products are included/excluded from optimization and data quality status.")
155
+
156
+ # Load and analyze data
157
+ validator = DemandValidationViz()
158
+ with st.spinner("Loading and analyzing data..."):
159
+ if not validator.load_data():
160
+ st.error("Failed to load data for validation.")
161
+ return
162
+ validation_df = validator.validate_all_products()
163
+ stats = validator.get_summary_statistics(validation_df)
164
+
165
+ # ===== SUMMARY METRICS =====
166
+ st.subheader("📊 Summary Statistics")
167
+ col1, col2, col3, col4 = st.columns(4)
168
+ col1.metric("Total Products", stats['total_products'])
169
+ col1.metric("Included in Optimization", stats['included_products'], delta="Ready")
170
+ col2.metric("Total Demand", f"{stats['total_demand']:,}")
171
+ col2.metric("Excluded from Optimization", stats['excluded_products'], delta="Omitted")
172
+ col3.metric("Included Demand", f"{stats['included_demand']:,}", delta="Will be optimized")
173
+ col3.metric("UNICEF Staff Needed", stats['total_unicef_needed'])
174
+ col4.metric("Excluded Demand", f"{stats['excluded_demand']:,}", delta="Omitted")
175
+ col4.metric("Humanizer Staff Needed", stats['total_humanizer_needed'])
176
+
177
+ # ===== PRODUCT TYPE DISTRIBUTION =====
178
+ st.subheader("📈 Product Type Distribution")
179
+ if stats['type_counts']:
180
+ col1, col2 = st.columns(2)
181
+ with col1:
182
+ type_df = pd.DataFrame(list(stats['type_counts'].items()), columns=['Product Type', 'Count'])
183
+ st.bar_chart(type_df.set_index('Product Type'))
184
+ with col2:
185
+ for ptype, count in stats['type_counts'].items():
186
+ percentage = (count / stats['total_products']) * 100
187
+ st.write(f"**{ptype}:** {count} products ({percentage:.1f}%)")
188
+
189
+ # ===== DATA QUALITY ISSUES (for included products only) =====
190
+ st.subheader("⚠️ Data Quality Issues (Included Products)")
191
+ st.write("Issues affecting products that **will be** included in optimization:")
192
+ col1, col2, col3, col4 = st.columns(4)
193
+ col1.metric("No Line Assignment", stats['no_line_assignment'],
194
+ delta=None if stats['no_line_assignment'] == 0 else "Issue")
195
+ col2.metric("No Staffing Data", stats['no_staffing'],
196
+ delta=None if stats['no_staffing'] == 0 else "Issue")
197
+ col3.metric("No Speed Data", stats['no_speed'],
198
+ delta=None if stats['no_speed'] == 0 else "Will use default")
199
+ col4.metric("No Hierarchy Data", stats['no_hierarchy'],
200
+ delta=None if stats['no_hierarchy'] == 0 else "Issue")
201
+ col5.metric("Excluded: Too High Demand", stats['excluded_with_too_high_demand'],
202
+ delta=None if stats['excluded_with_too_high_demand'] == 0 else "Excluded")
203
+ # ===== INCLUDED PRODUCTS TABLE =====
204
+ included_df = validation_df[validation_df['Excluded from Optimization'] == False].copy()
205
+ excluded_df = validation_df[validation_df['Excluded from Optimization'] == True].copy()
206
+
207
+ st.subheader("✅ Products Included in Optimization")
208
+ st.write(f"**{len(included_df)} products** with total demand of **{included_df['Demand'].sum():,} units**")
209
+
210
+ if len(included_df) > 0:
211
+ # Filters
212
+ col1, col2 = st.columns(2)
213
+ type_filter = col1.selectbox("Filter by type", ["All"] + list(included_df['Product Type'].unique()), key="inc_filter")
214
+ min_demand = col2.number_input("Minimum demand", min_value=0, value=0, key="inc_demand")
215
+
216
+ # Apply filters
217
+ filtered = included_df.copy()
218
+ if type_filter != "All":
219
+ filtered = filtered[filtered['Product Type'] == type_filter]
220
+ if min_demand > 0:
221
+ filtered = filtered[filtered['Demand'] >= min_demand]
222
+
223
+ # Display table
224
+ display_cols = ['Product ID', 'Demand', 'Product Type', 'Line Type', 'UNICEF Staff',
225
+ 'Humanizer Staff', 'Production Speed (units/hour)', 'Data Quality Issues', 'Validation Status']
226
+ st.dataframe(filtered[display_cols], use_container_width=True, height=300)
227
+ else:
228
+ st.warning("No products are included in optimization!")
229
+
230
+ # ===== EXCLUDED PRODUCTS TABLE =====
231
+ st.subheader("🚫 Products Excluded from Optimization")
232
+ st.write(f"**{len(excluded_df)} products** with total demand of **{excluded_df['Demand'].sum():,} units**")
233
+ st.info("Excluded due to: missing line assignments, zero staffing, or non-standalone masters")
234
+
235
+ if len(excluded_df) > 0:
236
+ # Show exclusion breakdown
237
+ st.write("**Exclusion reasons:**")
238
+ for reason, count in excluded_df['Exclusion Reasons'].value_counts().items():
239
+ st.write(f"• {reason}: {count} products")
240
+
241
+ # Display table
242
+ display_cols = ['Product ID', 'Demand', 'Product Type', 'Exclusion Reasons',
243
+ 'UNICEF Staff', 'Humanizer Staff', 'Line Type']
244
+ st.dataframe(excluded_df[display_cols], use_container_width=True, height=200)
245
+
246
+ # Export button
247
+ if st.button("📥 Export Validation Results to CSV"):
248
+ st.download_button("Download CSV", validation_df.to_csv(index=False),
249
+ file_name="demand_validation_results.csv", mime="text/csv")
250
+
251
+ # ===== RECOMMENDATIONS =====
252
+ st.subheader("💡 Recommendations")
253
+
254
+ if stats['excluded_products'] > 0:
255
+ st.warning(f"**{stats['excluded_products']} products** ({stats['excluded_demand']:,} units) excluded from optimization")
256
+
257
+ # Show data quality issues for included products
258
+ if stats['no_line_assignment'] > 0:
259
+ st.info(f"**Line Assignment**: {stats['no_line_assignment']} included products missing line assignments")
260
+ if stats['no_staffing'] > 0:
261
+ st.info(f"**Staffing Data**: {stats['no_staffing']} included products missing staffing requirements")
262
+ if stats['no_speed'] > 0:
263
+ st.info(f"**Speed Data**: {stats['no_speed']} included products missing speed data (will use default 106.7 units/hour)")
264
+ if stats['no_hierarchy'] > 0:
265
+ st.info(f"**Hierarchy Data**: {stats['no_hierarchy']} included products not in kit hierarchy")
266
+
267
+ # Overall status
268
+ if stats['included_products'] > 0:
269
+ st.success(f"✅ **{stats['included_products']} products** with {stats['included_demand']:,} units demand ready for optimization!")
270
+ if stats['no_speed'] == 0 and stats['no_hierarchy'] == 0:
271
+ st.info("🎉 All included products have complete data!")
272
+ else:
273
+ st.error("❌ No products passed filtering. Review exclusion reasons and check data configuration.")
274
+
275
+
276
+ if __name__ == "__main__":
277
+ # For testing
278
+ display_demand_validation()
src/models/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # This file makes the models directory a Python package
src/models/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (159 Bytes). View file
 
src/models/__pycache__/optimizer_real.cpython-310.pyc ADDED
Binary file (19.6 kB). View file
 
src/models/optimizer_real.py ADDED
@@ -0,0 +1,780 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ============================================================
2
+ # SD_roster_real - Fixed Team Production Planning (Option A)
3
+ # - Uses config-style variable names from src/config/optimization_config.py
4
+ # - Team per product (simultaneous): UNICEF Fixed term / Humanizer
5
+ # - Line types via numeric ids: 6=long, 7=short
6
+ # - One product per (line, shift, day)
7
+ # - Weekly demand (across DATE_SPAN)
8
+ # ============================================================
9
+
10
+ from ortools.linear_solver import pywraplp
11
+ from math import ceil
12
+ import datetime
13
+ from src.config.constants import ShiftType, LineType, KitLevel
14
+
15
+ # ---- config import ----
16
+ # Import constants and other modules directly
17
+ from src.config.constants import ShiftType, LineType, DefaultConfig
18
+ import src.preprocess.extract as extract
19
+ from src.preprocess.hierarchy_parser import sort_products_by_hierarchy
20
+
21
+ class Optimizer:
22
+ """Workforce optimization class that handles all configuration and optimization logic"""
23
+
24
+ def __init__(self):
25
+ """Initialize optimizer with session state configuration"""
26
+ self.load_session_state_config()
27
+ self.load_data()
28
+
29
+ def load_session_state_config(self):
30
+ """Load all configuration from session state"""
31
+ import streamlit as st
32
+ import datetime as dt
33
+
34
+ # Date configuration
35
+ self.start_date = st.session_state.start_date
36
+ self.planning_days = st.session_state.planning_days
37
+ self.start_datetime = dt.datetime.combine(self.start_date, dt.datetime.min.time())
38
+ self.end_date = self.start_datetime + dt.timedelta(days=self.planning_days - 1)
39
+ self.date_span = list(range(1, self.planning_days + 1))
40
+
41
+ # Employee and shift configuration
42
+ self.employee_type_list = list(st.session_state.selected_employee_types)
43
+ self.active_shift_list = sorted(list(st.session_state.selected_shifts))
44
+
45
+ print("\n[DEBUG] From session_state.selected_employee_types:")
46
+ for emp in self.employee_type_list:
47
+ print(f" - '{emp}' (len={len(emp)}, repr={repr(emp)})")
48
+
49
+ # Working hours configuration
50
+ self.max_hour_per_person_per_day = st.session_state.max_hour_per_person_per_day
51
+ self.max_hours_shift = {
52
+ ShiftType.REGULAR: st.session_state.max_hours_shift_1,
53
+ ShiftType.EVENING: st.session_state.max_hours_shift_2,
54
+ ShiftType.OVERTIME: st.session_state.max_hours_shift_3
55
+ }
56
+
57
+ # Workforce limits
58
+ self.max_employee_per_type_on_day = st.session_state.max_employee_per_type_on_day
59
+
60
+ # Operations configuration
61
+ self.line_counts = st.session_state.line_counts
62
+ self.max_parallel_workers = {
63
+ LineType.LONG_LINE: st.session_state.max_parallel_workers_long_line,
64
+ LineType.MINI_LOAD: st.session_state.max_parallel_workers_mini_load
65
+ }
66
+
67
+ # Cost configuration
68
+ self.cost_list_per_emp_shift = st.session_state.cost_list_per_emp_shift
69
+
70
+ # Payment mode configuration
71
+ self.payment_mode_config = st.session_state.payment_mode_config
72
+
73
+ # Fixed staffing requirements
74
+ self.fixed_min_unicef_per_day = st.session_state.fixed_min_unicef_per_day
75
+
76
+ print("✅ Session state configuration loaded successfully")
77
+
78
+ def load_data(self):
79
+ """Load all required data from files"""
80
+ # Load hierarchy data
81
+ try:
82
+ kit_levels, dependencies, priority_order = extract.get_production_order_data()
83
+ self.kit_levels = kit_levels
84
+ self.kit_dependencies = dependencies
85
+ self.production_priority_order = priority_order
86
+ except:
87
+ self.kit_levels = {}
88
+ self.kit_dependencies = {}
89
+ self.production_priority_order = []
90
+
91
+ # Load kit line match data
92
+ try:
93
+ kit_line_match = extract.read_kit_line_match_data()
94
+ kit_line_match_dict = kit_line_match.set_index("kit_name")["line_type"].to_dict()
95
+
96
+ # Create line name to ID mapping
97
+ line_name_to_id = {
98
+ "long line": LineType.LONG_LINE,
99
+ "mini load": LineType.MINI_LOAD
100
+ }
101
+
102
+ # Convert line names to IDs
103
+ self.kit_line_match_dict = {}
104
+ for kit_name, line_name in kit_line_match_dict.items():
105
+ self.kit_line_match_dict[kit_name] = line_name_to_id.get(line_name.lower(), line_name)
106
+ except:
107
+ self.kit_line_match_dict = {}
108
+
109
+ # Load product and demand data
110
+ try:
111
+ from src.demand_filtering import DemandFilter
112
+ filter_instance = DemandFilter()
113
+ filter_instance.load_data(force_reload=True)
114
+ self.product_list = filter_instance.get_filtered_product_list()
115
+ self.demand_dictionary = filter_instance.get_filtered_demand_dictionary()
116
+ except:
117
+ self.product_list = []
118
+ self.demand_dictionary = {}
119
+
120
+ # Load team requirements
121
+ try:
122
+ print("\n[DEBUG] Loading team requirements from Kits Calculation...")
123
+ kits_df = extract.read_personnel_requirement_data()
124
+ print(f"[DEBUG] Loaded kits_df with {len(kits_df)} rows")
125
+ print(f"[DEBUG] Columns: {list(kits_df.columns)}")
126
+
127
+ # Initialize team requirements dictionary
128
+ self.team_req_per_product = {
129
+ "UNICEF Fixed term": {},
130
+ "Humanizer": {}
131
+ }
132
+
133
+ # Process each product in the product list
134
+ for product in self.product_list:
135
+ product_data = kits_df[kits_df['Kit'] == product]
136
+ if not product_data.empty:
137
+ # Extract Humanizer and UNICEF staff requirements
138
+ humanizer_req = product_data["Humanizer"].iloc[0]
139
+ unicef_req = product_data["UNICEF staff"].iloc[0]
140
+
141
+ # Convert to int (data is already cleaned in extract function)
142
+ self.team_req_per_product["Humanizer"][product] = int(humanizer_req)
143
+ self.team_req_per_product["UNICEF Fixed term"][product] = int(unicef_req)
144
+ else:
145
+ print(f"[WARN] Product {product} not found in Kits Calculation, setting requirements to 0")
146
+ self.team_req_per_product["Humanizer"][product] = 0
147
+ self.team_req_per_product["UNICEF Fixed term"][product] = 0
148
+
149
+ print(f"\n[DEBUG] team_req_per_product keys after loading:")
150
+ for key in self.team_req_per_product.keys():
151
+ product_count = len(self.team_req_per_product[key])
152
+ print(f" - '{key}' (len={len(key)}, {product_count} products)")
153
+
154
+ except Exception as e:
155
+ print(f"[ERROR] Failed to load team requirements: {e}")
156
+ import traceback
157
+ traceback.print_exc()
158
+ self.team_req_per_product = {}
159
+
160
+ # Load product speed data
161
+ try:
162
+ self.per_product_speed = extract.read_package_speed_data()
163
+ except:
164
+ self.per_product_speed = {}
165
+
166
+ print("✅ All data loaded successfully")
167
+
168
+ def build_lines(self):
169
+ """Build line instances from session state configuration"""
170
+ line_tuples = []
171
+
172
+ try:
173
+ import streamlit as st
174
+ # Get selected line types from Data Selection tab
175
+ selected_lines = st.session_state.selected_lines
176
+ # Get line counts from Operations tab
177
+ line_counts = st.session_state.line_counts
178
+
179
+ print(f"Using lines from session state - selected: {selected_lines}, counts: {line_counts}")
180
+ for line_type in selected_lines:
181
+ count = line_counts.get(line_type, 0)
182
+ for i in range(1, count + 1):
183
+ line_tuples.append((line_type, i))
184
+
185
+ return line_tuples
186
+
187
+ except Exception as e:
188
+ print(f"Could not get line config from session state: {e}")
189
+ # Fallback: Use default values
190
+ print("Falling back to default line configuration")
191
+ default_selected_lines = [LineType.LONG_LINE, LineType.MINI_LOAD]
192
+ default_line_counts = {
193
+ LineType.LONG_LINE: DefaultConfig.LINE_COUNT_LONG_LINE,
194
+ LineType.MINI_LOAD: DefaultConfig.LINE_COUNT_MINI_LOAD
195
+ }
196
+
197
+ for line_type in default_selected_lines:
198
+ count = default_line_counts.get(line_type, 0)
199
+ for i in range(1, count + 1):
200
+ line_tuples.append((line_type, i))
201
+
202
+ return line_tuples
203
+
204
+ def run_optimization(self):
205
+ """Run the main optimization algorithm"""
206
+ # *** CRITICAL: Load fresh data to reflect current Streamlit configs ***
207
+ print("\n" + "="*60)
208
+ print("🔄 LOADING FRESH DATA FOR OPTIMIZATION")
209
+ print("="*60)
210
+
211
+ print(f"📦 LOADED PRODUCTS: {len(self.product_list)} products")
212
+ print(f"📈 LOADED DEMAND: {sum(self.demand_dictionary.values())} total units")
213
+ print(f"👥 LOADED TEAM REQUIREMENTS: {len(self.team_req_per_product)} employee types")
214
+
215
+ # Debug: Print team requirements keys
216
+ print("\n[DEBUG] team_req_per_product employee types:")
217
+ for emp_type in self.team_req_per_product.keys():
218
+ print(f" - '{emp_type}'")
219
+
220
+ print("\n[DEBUG] self.employee_type_list:")
221
+ for emp_type in self.employee_type_list:
222
+ print(f" - '{emp_type}'")
223
+
224
+ # Build ACTIVE schedule for fresh product list
225
+ ACTIVE = {t: {p: 1 for p in self.product_list} for t in self.date_span}
226
+
227
+ # --- Sets ---
228
+ date_span_list = list(self.date_span)
229
+ employee_type_list = self.employee_type_list
230
+ active_shift_list = self.active_shift_list
231
+ print(f"\n[DEBUG] employee_type_list: {employee_type_list}")
232
+ print(f"[DEBUG] active_shift_list: {active_shift_list}")
233
+
234
+ # *** HIERARCHY SORTING: Sort products by production priority ***
235
+ print("\n" + "="*60)
236
+ print("🔗 APPLYING HIERARCHY-BASED PRODUCTION ORDERING")
237
+ print("="*60)
238
+ sorted_product_list = sort_products_by_hierarchy(list(self.product_list), self.kit_levels, self.kit_dependencies)
239
+
240
+ line_tuples = self.build_lines()
241
+ print("Lines", line_tuples)
242
+
243
+ print("PER_PRODUCT_SPEED", self.per_product_speed)
244
+
245
+ # --- Short aliases for parameters ---
246
+ print("\n[DEBUG] Creating variable aliases...")
247
+ Hmax_s = dict(self.max_hours_shift) # per-shift hours
248
+ Hmax_daily = self.max_hour_per_person_per_day
249
+ max_workers_line = dict(self.max_parallel_workers) # per line type
250
+ max_employee_type_day = self.max_employee_per_type_on_day # {emp_type:{t:headcount}}
251
+ cost = self.cost_list_per_emp_shift # {emp_type:{shift:cost}}
252
+
253
+ # Create aliases for data dictionaries
254
+ TEAM_REQ_PER_PRODUCT = self.team_req_per_product
255
+ DEMAND_DICTIONARY = self.demand_dictionary
256
+ KIT_LINE_MATCH_DICT = self.kit_line_match_dict
257
+ KIT_LEVELS = self.kit_levels
258
+ KIT_DEPENDENCIES = self.kit_dependencies
259
+ PER_PRODUCT_SPEED = self.per_product_speed
260
+ FIXED_MIN_UNICEF_PER_DAY = self.fixed_min_unicef_per_day
261
+ PAYMENT_MODE_CONFIG = self.payment_mode_config
262
+
263
+ # Mock missing config variables (if they exist in config, they'll be overridden)
264
+ EVENING_SHIFT_MODE = "normal"
265
+ EVENING_SHIFT_DEMAND_THRESHOLD = 0.9
266
+
267
+ print(f"[DEBUG] TEAM_REQ_PER_PRODUCT has {len(TEAM_REQ_PER_PRODUCT)} employee types")
268
+ print(f"[DEBUG] employee_type_list has {len(employee_type_list)} types")
269
+
270
+ # --- Feasibility quick checks ---
271
+ print("\n[DEBUG] Starting feasibility checks...")
272
+
273
+ # 1) If team size is greater than max_workers_line, block the product-line type combination
274
+ for i, p in enumerate(sorted_product_list):
275
+ print(f"[DEBUG] Checking product {i+1}/{len(sorted_product_list)}: {p}")
276
+
277
+ # Check if all employee types exist in TEAM_REQ_PER_PRODUCT
278
+ for e in employee_type_list:
279
+ if e not in TEAM_REQ_PER_PRODUCT:
280
+ print(f"[ERROR] Employee type '{e}' not found in TEAM_REQ_PER_PRODUCT!")
281
+ print(f"[ERROR] Available keys: {list(TEAM_REQ_PER_PRODUCT.keys())}")
282
+ raise KeyError(f"Employee type '{e}' not in team requirements data")
283
+ if p not in TEAM_REQ_PER_PRODUCT[e]:
284
+ print(f"[ERROR] Product '{p}' not found in TEAM_REQ_PER_PRODUCT['{e}']!")
285
+ raise KeyError(f"Product '{p}' not in team requirements for employee type '{e}'")
286
+
287
+ req_total = sum(TEAM_REQ_PER_PRODUCT[e][p] for e in employee_type_list)
288
+ print(f"[DEBUG] req_total: {req_total}")
289
+ lt = KIT_LINE_MATCH_DICT.get(p, 6) # Default to long line (6) if not found
290
+ if p not in KIT_LINE_MATCH_DICT:
291
+ print(f"[WARN] Product {p}: No line type mapping found, defaulting to long line (6)")
292
+ if req_total > max_workers_line.get(lt, 1e9):
293
+ print(f"[WARN] Product {p}: team size {req_total} > MAX_PARALLEL_WORKERS[{lt}] "
294
+ f"= {max_workers_line.get(lt)}. Blocked.")
295
+
296
+ # 2) Check if demand can be met without evening shift (only if in normal mode)
297
+ if EVENING_SHIFT_MODE == "normal":
298
+ total_demand = sum(DEMAND_DICTIONARY.get(p, 0) for p in sorted_product_list)
299
+
300
+ # Calculate maximum capacity with regular + overtime shifts only
301
+ regular_overtime_shifts = [s for s in active_shift_list if s in ShiftType.REGULAR_AND_OVERTIME]
302
+ max_capacity = 0
303
+
304
+ for p in sorted_product_list:
305
+ if p in PER_PRODUCT_SPEED:
306
+ product_speed = PER_PRODUCT_SPEED[p] # units per hour
307
+ # Calculate max hours available for this product across all lines and shifts
308
+ max_hours_per_product = 0
309
+ for ell in line_tuples:
310
+ for s in regular_overtime_shifts:
311
+ for t in date_span_list:
312
+ max_hours_per_product += Hmax_s[s]
313
+
314
+ max_capacity += product_speed * max_hours_per_product
315
+
316
+ capacity_ratio = max_capacity / total_demand if total_demand > 0 else float('inf')
317
+
318
+ print(f"[CAPACITY CHECK] Total demand: {total_demand}")
319
+ print(f"[CAPACITY CHECK] Max capacity (Regular + Overtime): {max_capacity:.1f}")
320
+ print(f"[CAPACITY CHECK] Capacity ratio: {capacity_ratio:.2f}")
321
+
322
+ if capacity_ratio < EVENING_SHIFT_DEMAND_THRESHOLD:
323
+ print(f"\n🚨 [ALERT] DEMAND TOO HIGH!")
324
+ print(f" Current capacity can only meet {capacity_ratio*100:.1f}% of demand")
325
+ print(f" Threshold: {EVENING_SHIFT_DEMAND_THRESHOLD*100:.1f}%")
326
+ print(f" RECOMMENDATION: Change EVENING_SHIFT_MODE to 'activate_evening' to enable evening shift")
327
+ print(f" This will add shift 3 to increase capacity\n")
328
+
329
+
330
+ # --- Solver ---
331
+ solver = pywraplp.Solver.CreateSolver('CBC')
332
+ if not solver:
333
+ raise RuntimeError("CBC solver not found.")
334
+ INF = solver.infinity()
335
+
336
+ # --- Variables ---
337
+ # Assignment[p,ell,s,t] ∈ {0,1}: 1 if product p runs on (line,shift,day)
338
+ Assignment, Hours, Units = {}, {}, {} # Hours: run hours, Units: production units
339
+ for p in sorted_product_list:
340
+ for ell in line_tuples: # ell = (line_type_id, idx)
341
+ for s in active_shift_list:
342
+ for t in date_span_list:
343
+ #Is product p assigned to run on line ell, during shift s, on day t?
344
+ Assignment[p, ell, s, t] = solver.BoolVar(f"Z_{p}_{ell[0]}_{ell[1]}_s{s}_d{t}")
345
+ #How many hours does product p run on line ell, during shift s, on day t?
346
+ Hours[p, ell, s, t] = solver.NumVar(0, Hmax_s[s], f"T_{p}_{ell[0]}_{ell[1]}_s{s}_d{t}")
347
+ #How many units does product p run on line ell, during shift s, on day t?
348
+ Units[p, ell, s, t] = solver.NumVar(0, INF, f"U_{p}_{ell[0]}_{ell[1]}_s{s}_d{t}")
349
+
350
+ # Note: IDLE variables removed - we only track employees actually working on production
351
+
352
+ # Variable to track actual number of employees of each type working each shift each day
353
+ # This represents how many distinct employees of type e are working in shift s on day t
354
+ EMPLOYEE_COUNT = {}
355
+ for e in employee_type_list:
356
+ for s in active_shift_list:
357
+ for t in date_span_list:
358
+ # Note: Minimum staffing is per day, not per shift
359
+ # We'll handle the daily minimum constraint separately
360
+ max_count = max_employee_type_day.get(e, {}).get(t, 100)
361
+ EMPLOYEE_COUNT[e, s, t] = solver.IntVar(
362
+ 0, # No minimum per shift (daily minimum handled separately)
363
+ max_count,
364
+ f"EmpCount_{e}_s{s}_day{t}"
365
+ )
366
+
367
+ # Track total person-hours worked by each employee type per shift per day
368
+ # This is needed for employee-centric wage calculation
369
+ EMPLOYEE_HOURS = {}
370
+ for e in employee_type_list:
371
+ for s in active_shift_list:
372
+ for t in date_span_list:
373
+ # Sum of all work hours for employee type e in shift s on day t
374
+ # This represents total person-hours (e.g., 5 employees × 8 hours = 40 person-hours)
375
+ EMPLOYEE_HOURS[e, s, t] = solver.Sum(
376
+ TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t]
377
+ for p in sorted_product_list
378
+ for ell in line_tuples
379
+ )
380
+
381
+ # Note: Binary variables for bulk payment are now created inline in the cost calculation
382
+
383
+ # --- Objective: Minimize total labor cost (wages) ---
384
+ # Employee-centric approach: calculate wages based on actual employees and their hours
385
+ print(f"\n[DEBUG] Payment mode configuration: {PAYMENT_MODE_CONFIG}")
386
+
387
+ # Build cost terms based on payment mode
388
+ cost_terms = []
389
+
390
+ for e in employee_type_list:
391
+ for s in active_shift_list:
392
+ for t in date_span_list:
393
+ payment_mode = PAYMENT_MODE_CONFIG.get(s, "partial") # Default to partial if not specified
394
+
395
+ if payment_mode == "partial":
396
+ # Partial payment: pay for actual person-hours worked
397
+ # Cost = hourly_rate × total_person_hours
398
+ # Example: $20/hr × 40 person-hours = $800
399
+ cost_terms.append(cost[e][s] * EMPLOYEE_HOURS[e, s, t])
400
+
401
+ elif payment_mode == "bulk":
402
+ # Bulk payment: if ANY work happens in shift, pay ALL working employees for FULL shift
403
+ # We need to know: did employee type e work at all in shift s on day t?
404
+
405
+ # Create binary: 1 if employee type e worked in this shift
406
+ work_in_shift = solver.BoolVar(f"work_{e}_s{s}_d{t}")
407
+
408
+ # Link binary to work hours
409
+ # If EMPLOYEE_HOURS > 0, then work_in_shift = 1
410
+ # If EMPLOYEE_HOURS = 0, then work_in_shift = 0
411
+ max_possible_hours = Hmax_s[s] * max_employee_type_day[e][t]
412
+ solver.Add(EMPLOYEE_HOURS[e, s, t] <= max_possible_hours * work_in_shift)
413
+ solver.Add(work_in_shift * 0.001 <= EMPLOYEE_HOURS[e, s, t])
414
+
415
+ # Calculate number of employees working in this shift
416
+ # This is approximately: ceil(EMPLOYEE_HOURS / Hmax_s[s])
417
+ # But we can use: employees_working_in_shift
418
+ # For simplicity, use EMPLOYEE_HOURS / Hmax_s[s] as continuous approximation
419
+ # Or better: create a variable for employees per shift
420
+
421
+ # Simpler approach: For bulk payment, assume if work happens,
422
+ # we need approximately EMPLOYEE_HOURS/Hmax_s[s] employees,
423
+ # and each gets paid for full shift
424
+ # Cost ≈ (EMPLOYEE_HOURS / Hmax_s[s]) × Hmax_s[s] × hourly_rate = EMPLOYEE_HOURS × hourly_rate
425
+ # But that's the same as partial! The difference is we round up employees.
426
+
427
+ # Better approach: Create variable for employees working in this specific shift
428
+ employees_in_shift = solver.IntVar(0, max_employee_type_day[e][t], f"emp_{e}_s{s}_d{t}")
429
+
430
+ # Link employees_in_shift to work requirements
431
+ # If EMPLOYEE_HOURS requires N employees, then employees_in_shift >= ceil(N)
432
+ solver.Add(employees_in_shift * Hmax_s[s] >= EMPLOYEE_HOURS[e, s, t])
433
+
434
+ # Cost: pay each employee for full shift
435
+ cost_terms.append(cost[e][s] * Hmax_s[s] * employees_in_shift)
436
+
437
+ # Note: No idle employee costs - only pay for employees actually working
438
+
439
+ total_cost = solver.Sum(cost_terms)
440
+
441
+ # Objective: minimize total labor cost (wages)
442
+ # This finds the optimal production schedule (product order, line assignment, timing)
443
+ # that minimizes total wages while meeting all demand and capacity constraints
444
+ solver.Minimize(total_cost)
445
+
446
+ # --- Constraints ---
447
+
448
+ # 1) Weekly demand - must meet exactly (no over/under production)
449
+ for p in sorted_product_list:
450
+ total_production = solver.Sum(Units[p, ell, s, t] for ell in line_tuples for s in active_shift_list for t in date_span_list)
451
+ demand = DEMAND_DICTIONARY.get(p, 0)
452
+
453
+ # Must produce at least the demand
454
+ solver.Add(total_production >= demand)
455
+
456
+ # Must not produce more than the demand (prevent overproduction)
457
+ solver.Add(total_production <= demand)
458
+
459
+ # 2) One product per (line,shift,day) + time gating
460
+ for ell in line_tuples:
461
+ for s in active_shift_list:
462
+ for t in date_span_list:
463
+ solver.Add(solver.Sum(Assignment[p, ell, s, t] for p in sorted_product_list) <= 1)
464
+ for p in sorted_product_list:
465
+ solver.Add(Hours[p, ell, s, t] <= Hmax_s[s] * Assignment[p, ell, s, t])
466
+
467
+ # 3) Product-line type compatibility + (optional) activity by day
468
+ for p in sorted_product_list:
469
+ req_lt = KIT_LINE_MATCH_DICT.get(p, LineType.LONG_LINE) # Default to long line if not found
470
+ req_total = sum(TEAM_REQ_PER_PRODUCT[e][p] for e in employee_type_list)
471
+ for ell in line_tuples:
472
+ allowed = (ell[0] == req_lt) and (req_total <= max_workers_line.get(ell[0], 1e9))
473
+ for s in active_shift_list:
474
+ for t in date_span_list:
475
+ if ACTIVE[t][p] == 0 or not allowed:
476
+ solver.Add(Assignment[p, ell, s, t] == 0)
477
+ solver.Add(Hours[p, ell, s, t] == 0)
478
+ solver.Add(Units[p, ell, s, t] == 0)
479
+
480
+ # 4) Line throughput: Units ≤ product_speed * Hours
481
+ for p in sorted_product_list:
482
+ for ell in line_tuples:
483
+ for s in active_shift_list:
484
+ for t in date_span_list:
485
+ # Get product speed (same speed regardless of line type)
486
+ if p in PER_PRODUCT_SPEED:
487
+ # Convert kit per day to kit per hour (assuming 7.5 hour workday)
488
+ speed = PER_PRODUCT_SPEED[p]
489
+ # Upper bound: units cannot exceed capacity
490
+ solver.Add(
491
+ Units[p, ell, s, t] <= speed * Hours[p, ell, s, t]
492
+ )
493
+ # Lower bound: if working, must produce (prevent phantom work)
494
+ solver.Add(
495
+ Units[p, ell, s, t] >= speed * Hours[p, ell, s, t]
496
+ )
497
+ else:
498
+ # Default speed if not found
499
+ default_speed = 800 / 7.5 # units per hour
500
+ print(f"Warning: No speed data for product {p}, using default {default_speed:.1f} per hour")
501
+ # Upper bound: units cannot exceed capacity
502
+ solver.Add(
503
+ Units[p, ell, s, t] <= default_speed * Hours[p, ell, s, t]
504
+ )
505
+ # Lower bound: if working, must produce (prevent phantom work)
506
+ solver.Add(
507
+ Units[p, ell, s, t] >= default_speed * Hours[p, ell, s, t]
508
+ )
509
+
510
+ # Working hours constraint: active employees cannot exceed shift hour capacity
511
+ for e in employee_type_list:
512
+ for s in active_shift_list:
513
+ for t in date_span_list:
514
+ # No idle employee constraints - employees are only counted when working
515
+ solver.Add(
516
+ solver.Sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t] for p in sorted_product_list for ell in line_tuples)
517
+ <= Hmax_s[s] * max_employee_type_day[e][t]
518
+ )
519
+
520
+ # 6) Per-shift staffing capacity by type: link employee count to actual work hours
521
+ # This constraint ensures EMPLOYEE_COUNT[e,s,t] represents the actual number of employees needed in each shift
522
+ for e in employee_type_list:
523
+ for s in active_shift_list:
524
+ for t in date_span_list:
525
+ # Total person-hours worked by employee type e in shift s on day t
526
+ total_person_hours_in_shift = solver.Sum(
527
+ TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t]
528
+ for p in sorted_product_list
529
+ for ell in line_tuples
530
+ )
531
+
532
+ # Employee count must be sufficient to cover the work in this shift
533
+ # If employees work H person-hours total and each can work max M hours/shift,
534
+ # then we need at least ceil(H/M) employees
535
+ # Constraint: employee_count × max_hours_per_shift >= total_person_hours_in_shift
536
+ solver.Add(EMPLOYEE_COUNT[e, s, t] * Hmax_s[s] >= total_person_hours_in_shift)
537
+
538
+ # 7) Shift ordering constraints (only apply if shifts are available)
539
+ # Evening shift after regular shift
540
+ if ShiftType.EVENING in active_shift_list and ShiftType.REGULAR in active_shift_list: # Only if both shifts are available
541
+ for e in employee_type_list:
542
+ for t in date_span_list:
543
+ solver.Add(
544
+ solver.Sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, ShiftType.EVENING, t] for p in sorted_product_list for ell in line_tuples)
545
+ <=
546
+ solver.Sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, ShiftType.REGULAR, t] for p in sorted_product_list for ell in line_tuples)
547
+ )
548
+
549
+ # Overtime should only be used when regular shift is at capacity
550
+ if ShiftType.OVERTIME in active_shift_list and ShiftType.REGULAR in active_shift_list: # Only if both shifts are available
551
+ print("\n[OVERTIME] Adding constraints to ensure overtime only when regular shift is insufficient...")
552
+
553
+ for e in employee_type_list:
554
+ for t in date_span_list:
555
+ # Get available regular capacity for this employee type and day
556
+ regular_capacity = max_employee_type_day[e][t]
557
+
558
+ # Total regular shift usage for this employee type and day
559
+ regular_usage = solver.Sum(
560
+ TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, ShiftType.REGULAR, t]
561
+ for p in sorted_product_list for ell in line_tuples
562
+ )
563
+
564
+ # Total overtime usage for this employee type and day
565
+ overtime_usage = solver.Sum(
566
+ TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, ShiftType.OVERTIME, t]
567
+ for p in sorted_product_list for ell in line_tuples
568
+ )
569
+
570
+ # Create binary variable: 1 if using overtime, 0 otherwise
571
+ using_overtime = solver.IntVar(0, 1, f'using_overtime_{e}_{t}')
572
+
573
+ # If using overtime, regular capacity must be utilized significantly
574
+ # Regular usage must be at least 90% of capacity to allow overtime
575
+ min_regular_for_overtime = int(0.9 * regular_capacity)
576
+
577
+ # Constraint 1: Can only use overtime if regular usage is high
578
+ solver.Add(regular_usage >= min_regular_for_overtime * using_overtime)
579
+
580
+ # Constraint 2: If any overtime is used, set the binary variable
581
+ solver.Add(overtime_usage <= regular_capacity * using_overtime)
582
+
583
+ overtime_constraints_added = len(employee_type_list) * len(date_span_list) * 2 # 2 constraints per employee type per day
584
+ print(f"[OVERTIME] Added {overtime_constraints_added} constraints ensuring overtime only when regular shifts are at 90%+ capacity")
585
+
586
+ # 7.5) Bulk payment linking constraints are now handled inline in the cost calculation
587
+
588
+ # 7.6) *** FIXED MINIMUM UNICEF EMPLOYEES CONSTRAINT ***
589
+ # Ensure minimum UNICEF fixed-term staff work in the REGULAR shift every day
590
+ # The minimum applies to the regular shift specifically (not overtime or evening)
591
+ if 'UNICEF Fixed term' in employee_type_list and FIXED_MIN_UNICEF_PER_DAY > 0:
592
+ if ShiftType.REGULAR in active_shift_list:
593
+ print(f"\n[FIXED STAFFING] Adding constraint for minimum {FIXED_MIN_UNICEF_PER_DAY} UNICEF employees in REGULAR shift per day...")
594
+ for t in date_span_list:
595
+ # At least FIXED_MIN_UNICEF_PER_DAY employees must work in the regular shift each day
596
+ solver.Add(
597
+ EMPLOYEE_COUNT['UNICEF Fixed term', ShiftType.REGULAR, t] >= FIXED_MIN_UNICEF_PER_DAY
598
+ )
599
+ print(f"[FIXED STAFFING] Added {len(date_span_list)} constraints ensuring >= {FIXED_MIN_UNICEF_PER_DAY} UNICEF employees in regular shift per day")
600
+ else:
601
+ print(f"\n[FIXED STAFFING] Warning: Regular shift not available, cannot enforce minimum UNICEF staffing")
602
+
603
+ # 8) *** HIERARCHY DEPENDENCY CONSTRAINTS ***
604
+ # For subkits with prepack dependencies: dependencies should be produced before or same time
605
+ print("\n[HIERARCHY] Adding dependency constraints...")
606
+ dependency_constraints_added = 0
607
+
608
+ for p in sorted_product_list:
609
+ dependencies = KIT_DEPENDENCIES.get(p, [])
610
+ if dependencies:
611
+ # Get the level of the current product
612
+ p_level = KIT_LEVELS.get(p, 2)
613
+
614
+ for dep in dependencies:
615
+ if dep in sorted_product_list: # Only if dependency is also in production list
616
+ # Calculate "completion time" for each product (sum of all production times)
617
+ p_completion = solver.Sum(
618
+ t * Hours[p, ell, s, t] for ell in line_tuples for s in active_shift_list for t in date_span_list
619
+ )
620
+ dep_completion = solver.Sum(
621
+ t * Hours[dep, ell, s, t] for ell in line_tuples for s in active_shift_list for t in date_span_list
622
+ )
623
+
624
+ # Dependency should complete before or at the same time
625
+ solver.Add(dep_completion <= p_completion)
626
+ dependency_constraints_added += 1
627
+
628
+ print(f" Added constraint: {dep} (dependency) <= {p} (level {p_level})")
629
+
630
+ print(f"[HIERARCHY] Added {dependency_constraints_added} dependency constraints")
631
+
632
+ # --- Solve ---
633
+ status = solver.Solve()
634
+ if status != pywraplp.Solver.OPTIMAL:
635
+ status_names = {pywraplp.Solver.INFEASIBLE: "INFEASIBLE", pywraplp.Solver.UNBOUNDED: "UNBOUNDED"}
636
+ print(f"No optimal solution. Status: {status} ({status_names.get(status, 'UNKNOWN')})")
637
+ # Debug hint:
638
+ # solver.EnableOutput()
639
+ # solver.ExportModelAsLpFile("model.lp")
640
+ return None
641
+
642
+ # --- Report ---
643
+ result = {}
644
+ result['objective'] = solver.Objective().Value()
645
+
646
+ # Weekly production
647
+ prod_week = {p: sum(Units[p, ell, s, t].solution_value() for ell in line_tuples for s in active_shift_list for t in date_span_list) for p in sorted_product_list}
648
+ result['weekly_production'] = prod_week
649
+
650
+ # Which product ran on which line/shift/day
651
+ schedule = []
652
+ for t in date_span_list:
653
+ for ell in line_tuples:
654
+ for s in active_shift_list:
655
+ chosen = [p for p in sorted_product_list if Assignment[p, ell, s, t].solution_value() > 0.5]
656
+ if chosen:
657
+ p = chosen[0]
658
+ schedule.append({
659
+ 'day': t,
660
+ 'line_type_id': ell[0],
661
+ 'line_idx': ell[1],
662
+ 'shift': s,
663
+ 'product': p,
664
+ 'run_hours': Hours[p, ell, s, t].solution_value(),
665
+ 'units': Units[p, ell, s, t].solution_value(),
666
+ })
667
+ result['run_schedule'] = schedule
668
+
669
+ # Implied headcount by type/shift/day (ceil)
670
+ headcount = []
671
+ for e in employee_type_list:
672
+ for s in active_shift_list:
673
+ for t in date_span_list:
674
+ used_ph = sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t].solution_value() for p in sorted_product_list for ell in line_tuples)
675
+ need = ceil(used_ph / (Hmax_s[s] + 1e-9))
676
+ headcount.append({'emp_type': e, 'shift': s, 'day': t,
677
+ 'needed': need, 'available': max_employee_type_day[e][t]})
678
+ result['headcount_per_shift'] = headcount
679
+
680
+ # Total person-hours by type/day (≤ 14h * headcount)
681
+ ph_by_day = []
682
+ for e in employee_type_list:
683
+ for t in date_span_list:
684
+ used = sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t].solution_value() for s in active_shift_list for p in sorted_product_list for ell in line_tuples)
685
+ ph_by_day.append({'emp_type': e, 'day': t,
686
+ 'used_person_hours': used,
687
+ 'cap_person_hours': Hmax_daily * max_employee_type_day[e][t]})
688
+ result['person_hours_by_day'] = ph_by_day
689
+
690
+ # Actual employee count per type/shift/day (from EMPLOYEE_COUNT variable)
691
+ employee_count_by_shift = []
692
+ for e in employee_type_list:
693
+ for s in active_shift_list:
694
+ for t in date_span_list:
695
+ count = int(EMPLOYEE_COUNT[e, s, t].solution_value())
696
+ used_hours = sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t].solution_value()
697
+ for p in sorted_product_list for ell in line_tuples)
698
+ avg_hours_per_employee = used_hours / count if count > 0 else 0
699
+ if count > 0: # Only add entries where employees are working
700
+ employee_count_by_shift.append({
701
+ 'emp_type': e,
702
+ 'shift': s,
703
+ 'day': t,
704
+ 'employee_count': count,
705
+ 'total_person_hours': used_hours,
706
+ 'avg_hours_per_employee': avg_hours_per_employee,
707
+ 'available': max_employee_type_day[e][t]
708
+ })
709
+ result['employee_count_by_shift'] = employee_count_by_shift
710
+
711
+ # Also calculate daily totals (summing across shifts)
712
+ employee_count_by_day = []
713
+ for e in employee_type_list:
714
+ for t in date_span_list:
715
+ # Sum employees across all shifts for this day
716
+ total_count = sum(int(EMPLOYEE_COUNT[e, s, t].solution_value()) for s in active_shift_list)
717
+ used_hours = sum(TEAM_REQ_PER_PRODUCT[e][p] * Hours[p, ell, s, t].solution_value()
718
+ for s in active_shift_list for p in sorted_product_list for ell in line_tuples)
719
+ avg_hours_per_employee = used_hours / total_count if total_count > 0 else 0
720
+ if total_count > 0: # Only add days where employees are working
721
+ employee_count_by_day.append({
722
+ 'emp_type': e,
723
+ 'day': t,
724
+ 'employee_count': total_count,
725
+ 'total_person_hours': used_hours,
726
+ 'avg_hours_per_employee': avg_hours_per_employee,
727
+ 'available': max_employee_type_day[e][t]
728
+ })
729
+ result['employee_count_by_day'] = employee_count_by_day
730
+
731
+ # Note: Idle employee tracking removed - only counting employees actually working
732
+
733
+ # Pretty print
734
+ print("Objective (min cost):", result['objective'])
735
+ print("\n--- Weekly production by product ---")
736
+ for p, u in prod_week.items():
737
+ print(f"{p}: {u:.1f} / demand {DEMAND_DICTIONARY.get(p,0)}")
738
+
739
+ print("\n--- Schedule (line, shift, day) ---")
740
+ for row in schedule:
741
+ shift_name = ShiftType.get_name(row['shift'])
742
+ line_name = LineType.get_name(row['line_type_id'])
743
+ print(f"date_span_list{row['day']} {line_name}-{row['line_idx']} {shift_name}: "
744
+ f"{row['product']} Hours={row['run_hours']:.2f}h Units={row['units']:.1f}")
745
+
746
+ print("\n--- Implied headcount need (per type/shift/day) ---")
747
+ for row in headcount:
748
+ shift_name = ShiftType.get_name(row['shift'])
749
+ print(f"{row['emp_type']}, {shift_name}, date_span_list{row['day']}: "
750
+ f"need={row['needed']} (avail {row['available']})")
751
+
752
+ print("\n--- Total person-hours by type/day ---")
753
+ for row in ph_by_day:
754
+ print(f"{row['emp_type']}, date_span_list{row['day']}: used={row['used_person_hours']:.1f} "
755
+ f"(cap {row['cap_person_hours']})")
756
+
757
+ print("\n--- Actual employee count by type/shift/day ---")
758
+ for row in employee_count_by_shift:
759
+ shift_name = ShiftType.get_name(row['shift'])
760
+ print(f"{row['emp_type']}, {shift_name}, date_span_list{row['day']}: "
761
+ f"count={row['employee_count']} employees, "
762
+ f"total_hours={row['total_person_hours']:.1f}h, "
763
+ f"avg={row['avg_hours_per_employee']:.1f}h/employee")
764
+
765
+ print("\n--- Daily employee totals by type/day (sum across shifts) ---")
766
+ for row in employee_count_by_day:
767
+ print(f"{row['emp_type']}, date_span_list{row['day']}: "
768
+ f"count={row['employee_count']} employees total, "
769
+ f"total_hours={row['total_person_hours']:.1f}h, "
770
+ f"avg={row['avg_hours_per_employee']:.1f}h/employee "
771
+ f"(available: {row['available']})")
772
+
773
+ # Note: Idle employee reporting removed - only tracking employees actually working
774
+
775
+ return result
776
+
777
+
778
+ if __name__ == "__main__":
779
+ optimizer = Optimizer()
780
+ optimizer.run_optimization()
src/preprocess/__init__.py ADDED
File without changes
src/preprocess/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (163 Bytes). View file
 
src/preprocess/__pycache__/extract.cpython-310.pyc ADDED
Binary file (5.59 kB). View file
 
src/preprocess/__pycache__/hierarchy_parser.cpython-310.pyc ADDED
Binary file (6.49 kB). View file
 
src/preprocess/__pycache__/transform.cpython-310.pyc ADDED
Binary file (2.46 kB). View file
 
src/preprocess/data_preprocess.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+
4
+ def process_Kit_Composition_and_relation(output_csv_path: str = 'data/real_data_excel/converted_csv/Kit_Composition_and_relation_cleaned_with_line_type_and_id.csv') -> pd.DataFrame:
5
+ """
6
+ Process the Kit_Composition_and_relation.csv file to clean the data and add line type and id.
7
+
8
+ Returns:
9
+ saves to csv path
10
+ cleaned_df: pd.DataFrame
11
+ """
12
+ df = pd.read_csv('data/real_data_excel/converted_csv/Kit_Composition_and_relation.csv')
13
+ # df.dropna(inplace=True)
14
+ master = df[["Master Kit", "Master Kit Description"]]
15
+ master["kit_type"] = "master"
16
+ master.rename(columns={"Master Kit": "kit_name", "Master Kit Description": "kit_description"}, inplace=True)
17
+
18
+ subkit = df[["Sub kit", "Sub kit description"]]
19
+ subkit["kit_type"] = "subkit"
20
+ subkit.rename(columns={"Sub kit": "kit_name", "Sub kit Description": "kit_description"}, inplace=True)
21
+ subkit.columns = ["kit_name", "kit_description", "kit_type"]
22
+
23
+ prepack = df[["Prepack", "Prepack Description"]]
24
+ prepack["kit_type"] = "prepack"
25
+ prepack.rename(columns={"Prepack": "kit_name", "Prepack Description": "kit_description"}, inplace=True)
26
+
27
+
28
+ cleaned_df = pd.concat([master, subkit, prepack])
29
+ cleaned_df[['kit_name','kit_description','kit_type']].drop_duplicates()
30
+ tmp = cleaned_df.groupby('kit_name').count()['kit_type'].reset_index()
31
+ standalone_masterkit_list = tmp.loc[tmp['kit_type']==1,'kit_name']
32
+
33
+ cleaned_df.loc[cleaned_df['kit_name'].isin(standalone_masterkit_list),'line_type'] = 'long line'
34
+ cleaned_df.loc[cleaned_df['kit_type']=='prepack','line_type'] = 'mini load'
35
+ cleaned_df.loc[cleaned_df['kit_type']=='subkit','line_type'] = 'long line'
36
+ cleaned_df.loc[cleaned_df['line_type']=='mini load', 'line_id'] = 7
37
+ cleaned_df.loc[cleaned_df['line_type']=='long line', 'line_id'] = 6
38
+ cleaned_df.to_csv(output_csv_path, index=False)
39
+ return cleaned_df
src/preprocess/excel_to_csv_converter.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import os
3
+ from pathlib import Path
4
+
5
+ class ExcelToCsvConverter:
6
+ """
7
+ Convert an Excel file to CSV files.
8
+ """
9
+
10
+ def __init__(self, excel_path, output_dir=None):
11
+ self.excel_path = excel_path
12
+ self.output_dir = output_dir
13
+
14
+
15
+ def convert_excel_to_csv(excel_path, output_dir=None):
16
+ """
17
+ Convert each sheet of an Excel file to a separate CSV file.
18
+
19
+ Args:
20
+ excel_path (str): Path to the Excel file
21
+ output_dir (str): Output directory for CSV files. If None, uses same directory as Excel file
22
+ """
23
+ try:
24
+ # Set up output directory
25
+ if output_dir is None:
26
+ output_dir = os.path.dirname(excel_path)
27
+
28
+ # Create output directory if it doesn't exist
29
+ Path(output_dir).mkdir(parents=True, exist_ok=True)
30
+
31
+ # Read Excel file
32
+ excel_file = pd.ExcelFile(excel_path)
33
+ converted_files = []
34
+
35
+ for i, sheet_name in enumerate(excel_file.sheet_names, 1):
36
+ # Read the sheet
37
+ df = pd.read_excel(excel_path, sheet_name=sheet_name)
38
+
39
+ # Create a safe filename for the CSV
40
+ safe_filename = "".join(c for c in sheet_name if c.isalnum() or c in (' ', '-', '_')).rstrip()
41
+ #for specific sheet name, save the file name and use it later
42
+ self.sheet_name = sheet_name
43
+ self.safe_filename = safe_filename
44
+ safe_filename = safe_filename.replace(' ', '_')
45
+ csv_filename = f"{safe_filename}.csv"
46
+ csv_path = os.path.join(output_dir, csv_filename)
47
+
48
+ # Save as CSV
49
+ df.to_csv(csv_path, index=False, encoding='utf-8')
50
+ converted_files.append(csv_path)
51
+
52
+ print(f"✅ {i}. '{sheet_name}' → {csv_filename}")
53
+ print(f" - Saved {len(df)} rows, {len(df.columns)} columns")
54
+
55
+ print(f"\n🎉 Successfully converted {len(converted_files)} sheets to CSV files!")
56
+ return converted_files
57
+
58
+ except Exception as e:
59
+ print(f"❌ Error converting Excel to CSV: {e}")
60
+ return None
61
+
62
+
63
+ def convert_specific_sheet_to_csv(excel_path, sheet_name, output_dir=None):
64
+ """
65
+ Convert a specific sheet of an Excel file to a CSV file.
66
+ """
67
+ if output_dir is None:
68
+ output_dir = os.path.dirname(excel_path)
69
+
70
+ df = pd.read_excel(excel_path, sheet_name=sheet_name)
71
+ safe_filename = "".join(c for c in sheet_name if c.isalnum() or c in (' ', '-', '_')).rstrip()
72
+ safe_filename = safe_filename.replace(' ', '_')
73
+ csv_filename = f"{safe_filename}.csv"
74
+ csv_path = os.path.join(output_dir, csv_filename)
75
+ df.to_csv(csv_path, index=False, encoding='utf-8')
76
+ print(f"✅ {sheet_name} → {csv_filename}")
77
+
78
+ return csv_path
79
+
80
+ def main():
81
+ """Main function to analyze and convert Excel file"""
82
+
83
+ # Define paths
84
+ excel_path = "data/real_data_excel/AI Project document.xlsx"
85
+ output_dir = "data/real_data_excel/converted_csv"
86
+
87
+ # Check if Excel file exists
88
+ if not os.path.exists(excel_path):
89
+ print(f"❌ Excel file not found: {excel_path}")
90
+ return
91
+
92
+ print("=" * 60)
93
+ print("📊 EXCEL TO CSV CONVERTER")
94
+ print("=" * 60)
95
+
96
+ # Step 1: Analyze Excel structure
97
+ sheet_info = analyze_excel_structure(excel_path)
98
+
99
+ if sheet_info is None:
100
+ return
101
+
102
+ # Step 2: Convert to CSV
103
+ converted_files = convert_excel_to_csv(excel_path, output_dir)
104
+
105
+ if converted_files:
106
+ print("\n📂 Converted files:")
107
+ for file_path in converted_files:
108
+ print(f" - {file_path}")
109
+
110
+ if __name__ == "__main__":
111
+ main()
src/preprocess/extract.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import datetime
3
+ from datetime import date, timedelta
4
+ import json
5
+ import os
6
+ import yaml
7
+ from pathlib import Path
8
+
9
+ # Load paths configuration
10
+ _config_dir = Path(__file__).parent.parent / "config"
11
+ _paths_file = _config_dir / "paths.yaml"
12
+ with open(_paths_file, 'r', encoding='utf-8') as f:
13
+ PATHS = yaml.safe_load(f)
14
+
15
+
16
+ def read_kit_line_match_data() -> pd.DataFrame:
17
+ """Read kit composition and relation data"""
18
+ path = PATHS['data']['csv']['kit_composition']
19
+ return pd.read_csv(path)
20
+
21
+
22
+ def read_employee_data() -> pd.DataFrame:
23
+ """Read employee workforce hourly pay scale data"""
24
+ path = PATHS['data']['csv']['workforce_pay_scale']
25
+ return pd.read_csv(path)
26
+
27
+ def get_shift_info() -> pd.DataFrame:
28
+ """Read work shift information"""
29
+ path = PATHS['data']['csv']['work_shift']
30
+ df = pd.read_csv(path)
31
+ return df
32
+
33
+
34
+ def read_shift_cost_data() -> pd.DataFrame:
35
+ """Read shift cost data from workforce pay scale"""
36
+ path = PATHS['data']['csv']['workforce_pay_scale']
37
+ return pd.read_csv(path)
38
+
39
+
40
+ def read_work_center_capacity() -> pd.DataFrame:
41
+ """Read work center capacity data"""
42
+ path = PATHS['data']['csv']['work_center_capacity']
43
+ return pd.read_csv(path)
44
+
45
+
46
+ def read_material_master() -> pd.DataFrame:
47
+ """Read material master WMS data"""
48
+ path = PATHS['data']['csv']['material_master']
49
+ return pd.read_csv(path)
50
+
51
+ def read_packaging_line_data() -> pd.DataFrame:
52
+ """Read packaging line data (filtered work center capacity)"""
53
+ path = PATHS['data']['csv']['work_center_capacity_processed']
54
+ df = pd.read_csv(path)
55
+ # Filter for packaging lines only
56
+ df = df[df["line_for_packaging"] == True]
57
+ return df
58
+
59
+
60
+ def read_orders_data(
61
+ start_date=None,
62
+ # end_date=None,
63
+ ) -> pd.DataFrame:
64
+ """
65
+ Read COOIS Released Production Orders data
66
+
67
+ Args:
68
+ start_date: start date (pd.Timestamp or datetime)
69
+
70
+ Returns:
71
+ pd.DataFrame: filtered dataframe by date
72
+ """
73
+ path = PATHS['data']['csv']['demand']
74
+ df = pd.read_csv(path)
75
+ assert len(df) > 0, "No data found in the file"
76
+ # convert date column to datetime
77
+ df["Basic start date"] = pd.to_datetime(df["Basic start date"])
78
+
79
+
80
+ # filter by date
81
+ if start_date is not None: # Filter for exact start date only
82
+ df = df[df["Basic start date"] == pd.to_datetime(start_date)]
83
+ else:
84
+ raise ValueError("start_date is required")
85
+
86
+ return df
87
+
88
+
89
+ def read_package_speed_data():
90
+ """Read package speed data from Kits Calculation"""
91
+ path = PATHS['data']['csv']['kits_calculation']
92
+ df = pd.read_csv(path, usecols=["Kit", "Kit per day","Paid work hours per day"])
93
+ df["Kit per day"] = df["Kit per day"].astype(float)
94
+ df["Paid work hours per day"] = df["Paid work hours per day"].astype(float)
95
+ df["Kit"] = df["Kit"].astype(str)
96
+ df['kits_per_hour'] = df['Kit per day']/df['Paid work hours per day']
97
+ speeds_per_hour = dict(zip(df["Kit"], df["kits_per_hour"]))
98
+ return speeds_per_hour
99
+
100
+ def read_personnel_requirement_data():
101
+ """Read personnel requirement data from Kits Calculation"""
102
+ path = PATHS['data']['csv']['kits_calculation']
103
+ df = pd.read_csv(path, usecols=["Kit", "Humanizer", "UNICEF staff"])
104
+
105
+ # Clean the data by handling special whitespace characters like \xa0 (non-breaking space)
106
+ def clean_and_convert_to_float(value):
107
+ if pd.isna(value):
108
+ return 0.0
109
+
110
+ # Convert to string and strip all kinds of whitespace (including \xa0)
111
+ clean_value = str(value).strip()
112
+
113
+ # If empty after stripping, return 0
114
+ if clean_value == '' or clean_value == 'nan':
115
+ return 0.0
116
+
117
+ try:
118
+ return float(clean_value)
119
+ except ValueError as e:
120
+ print(f"Warning: Could not convert '{repr(value)}' to float, setting to 0. Error: {e}")
121
+ return 0.0
122
+
123
+ df["Humanizer"] = df["Humanizer"].apply(clean_and_convert_to_float)
124
+ df["UNICEF staff"] = df["UNICEF staff"].apply(clean_and_convert_to_float)
125
+ df["Kit"] = df["Kit"].astype(str)
126
+
127
+ return df
128
+
129
+
130
+ def get_production_order_data():
131
+ """
132
+ Extract production order information from hierarchy.
133
+
134
+ Returns:
135
+ tuple: (kit_levels, dependencies, priority_order)
136
+ - kit_levels: {kit_id: level} where level 0=prepack, 1=subkit, 2=master
137
+ - dependencies: {kit_id: [dependency_list]}
138
+ - priority_order: [kit_ids] sorted by production priority
139
+ """
140
+ path = PATHS['data']['hierarchy']['kit_hierarchy']
141
+ with open(path, 'r', encoding='utf-8') as f:
142
+ hierarchy = json.load(f)
143
+
144
+ kit_levels = {}
145
+ dependencies = {}
146
+
147
+ # Process hierarchy to extract levels and dependencies
148
+ for master_id, master_data in hierarchy.items():
149
+ # Master kits are level 2
150
+ kit_levels[master_id] = 2
151
+ dependencies[master_id] = master_data.get('dependencies', [])
152
+
153
+ # Process subkits (level 1)
154
+ for subkit_id, subkit_data in master_data.get('subkits', {}).items():
155
+ kit_levels[subkit_id] = 1
156
+ dependencies[subkit_id] = subkit_data.get('dependencies', [])
157
+
158
+ # Process prepacks under subkits (level 0)
159
+ for prepack_id in subkit_data.get('prepacks', []):
160
+ if prepack_id not in kit_levels: # Avoid overwriting if already exists
161
+ kit_levels[prepack_id] = 0
162
+ dependencies[prepack_id] = []
163
+
164
+ # Process direct prepacks under master (level 0)
165
+ for prepack_id in master_data.get('direct_prepacks', []):
166
+ if prepack_id not in kit_levels: # Avoid overwriting if already exists
167
+ kit_levels[prepack_id] = 0
168
+ dependencies[prepack_id] = []
169
+
170
+ # Create priority order: prepacks first, then subkits, then masters
171
+ priority_order = []
172
+
173
+ # Level 0: Prepacks (highest priority)
174
+ prepacks = [kit for kit, level in kit_levels.items() if level == 0]
175
+ priority_order.extend(sorted(prepacks))
176
+
177
+ # Level 1: Subkits (medium priority)
178
+ subkits = [kit for kit, level in kit_levels.items() if level == 1]
179
+ priority_order.extend(sorted(subkits))
180
+
181
+ # Level 2: Masters (lowest priority)
182
+ masters = [kit for kit, level in kit_levels.items() if level == 2]
183
+ priority_order.extend(sorted(masters))
184
+
185
+ return kit_levels, dependencies, priority_order
186
+
187
+
188
+
189
+ if __name__ == "__main__":
190
+ employee_data = read_employee_data()
191
+ print("employee data")
192
+ print(employee_data)
193
+ print("line speed data",read_package_speed_data())
194
+
src/preprocess/hierarchy_parser.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Kit Hierarchy Parser - Converts CSV hierarchy data to optimized formats
4
+
5
+ This module provides functions to:
6
+ 1. Parse Kit_Composition_and_relation.csv
7
+ 2. Generate JSON hierarchy structure
8
+ 3. Create production order CSV
9
+ 4. Build DAG for optimization constraints
10
+ """
11
+
12
+ import pandas as pd
13
+ import json
14
+ from typing import Dict, List, Tuple, Set
15
+ from collections import defaultdict, deque
16
+
17
+
18
+ class KitHierarchyParser:
19
+ """
20
+ Parses kit composition data and creates hierarchy structures
21
+ for production order optimization.
22
+ """
23
+
24
+ def __init__(self, csv_path: str = "data/real_data_excel/converted_csv/Kit_Composition_and_relation.csv"):
25
+ self.csv_path = csv_path
26
+ self.df = None
27
+ self.hierarchy_json = {}
28
+ self.production_order_csv = []
29
+ self.dependency_graph = {'nodes': set(), 'edges': set()}
30
+
31
+ def load_data(self):
32
+ """Load and clean the CSV data"""
33
+ self.df = pd.read_csv(self.csv_path)
34
+ print(f"Loaded {len(self.df)} rows from {self.csv_path}")
35
+
36
+ def parse_hierarchy(self) -> Dict:
37
+ """
38
+ Parse the hierarchy from CSV into JSON structure
39
+ Returns: Nested dictionary representing the hierarchy
40
+ """
41
+ if self.df is None:
42
+ self.load_data()
43
+
44
+ # Get unique relationships
45
+ relationships = self.df[['Master Kit', 'Master Kit Description',
46
+ 'Sub kit', 'Sub kit description',
47
+ 'Prepack', 'Prepack Description']].drop_duplicates()
48
+
49
+ hierarchy = defaultdict(lambda: {
50
+ 'name': '',
51
+ 'type': 'master',
52
+ 'subkits': defaultdict(lambda: {
53
+ 'name': '',
54
+ 'type': 'subkit',
55
+ 'prepacks': [],
56
+ 'dependencies': []
57
+ }),
58
+ 'dependencies': []
59
+ })
60
+
61
+ for _, row in relationships.iterrows():
62
+ master_id = row['Master Kit']
63
+ master_desc = row['Master Kit Description']
64
+ subkit_id = row['Sub kit']
65
+ subkit_desc = row['Sub kit description']
66
+ prepack_id = row['Prepack']
67
+ prepack_desc = row['Prepack Description']
68
+
69
+ if pd.notna(master_id):
70
+ # Set master info
71
+ hierarchy[master_id]['name'] = master_desc if pd.notna(master_desc) else ''
72
+
73
+ if pd.notna(subkit_id):
74
+ # Set subkit info
75
+ hierarchy[master_id]['subkits'][subkit_id]['name'] = subkit_desc if pd.notna(subkit_desc) else ''
76
+
77
+ # Add subkit to master dependencies
78
+ if subkit_id not in hierarchy[master_id]['dependencies']:
79
+ hierarchy[master_id]['dependencies'].append(subkit_id)
80
+
81
+ if pd.notna(prepack_id):
82
+ # Set prepack info
83
+ if prepack_id not in hierarchy[master_id]['subkits'][subkit_id]['prepacks']:
84
+ hierarchy[master_id]['subkits'][subkit_id]['prepacks'].append(prepack_id)
85
+
86
+ # Add prepack to subkit dependencies
87
+ if prepack_id not in hierarchy[master_id]['subkits'][subkit_id]['dependencies']:
88
+ hierarchy[master_id]['subkits'][subkit_id]['dependencies'].append(prepack_id)
89
+
90
+ elif pd.notna(prepack_id):
91
+ # Handle direct master-prepack relationship (no subkit)
92
+ # Add direct_prepacks list to hierarchy if it doesn't exist
93
+ if 'direct_prepacks' not in hierarchy[master_id]:
94
+ hierarchy[master_id]['direct_prepacks'] = []
95
+
96
+ # Add prepack directly to master
97
+ if prepack_id not in hierarchy[master_id]['direct_prepacks']:
98
+ hierarchy[master_id]['direct_prepacks'].append(prepack_id)
99
+
100
+ # Add prepack to master dependencies
101
+ if prepack_id not in hierarchy[master_id]['dependencies']:
102
+ hierarchy[master_id]['dependencies'].append(prepack_id)
103
+
104
+ # Convert defaultdict to regular dict for JSON serialization
105
+ self.hierarchy_json = json.loads(json.dumps(hierarchy, default=dict))
106
+ return self.hierarchy_json
107
+
108
+
109
+
110
+ def sort_products_by_hierarchy(product_list: List[str],
111
+ kit_levels: Dict[str, int],
112
+ kit_dependencies: Dict[str, List[str]]) -> List[str]:
113
+ """
114
+ Sort products by hierarchy levels and dependencies using topological sorting.
115
+ Returns products in optimal production order: prepacks → subkits → masters
116
+ Dependencies within the same level are properly ordered.
117
+
118
+ Args:
119
+ product_list: List of product names to sort
120
+ kit_levels: Dictionary mapping product names to hierarchy levels (0=prepack, 1=subkit, 2=master)
121
+ kit_dependencies: Dictionary mapping product names to their dependencies (products that must be made first)
122
+
123
+ Returns:
124
+ List of products sorted in production order (dependencies first)
125
+ """
126
+ # Filter products that are in our production list and have hierarchy data
127
+ products_with_hierarchy = [p for p in product_list if p in kit_levels]
128
+ products_without_hierarchy = [p for p in product_list if p not in kit_levels]
129
+
130
+ if products_without_hierarchy:
131
+ print(f"[HIERARCHY] Products without hierarchy data: {products_without_hierarchy}")
132
+
133
+ # Build dependency graph for products in our list
134
+ graph = defaultdict(list) # product -> [dependents]
135
+ in_degree = defaultdict(int) # product -> number of dependencies
136
+
137
+ # Initialize all products
138
+ for product in products_with_hierarchy:
139
+ in_degree[product] = 0
140
+
141
+ for product in products_with_hierarchy:
142
+ deps = kit_dependencies.get(product, []) # dependencies = products that has to be packed first
143
+ for dep in deps:
144
+ if dep in products_with_hierarchy: # Only if dependency is in our production list
145
+ # REVERSE THE RELATIONSHIP:
146
+ # kit_dependencies says: "product needs dep"
147
+ # graph says: "dep is needed by product"
148
+ graph[dep].append(product) # dep -> product (reverse the relationship!)
149
+ in_degree[product] += 1
150
+
151
+ # Topological sort with hierarchy level priority
152
+ sorted_products = []
153
+ # queue = able to remove from both sides
154
+ queue = deque()
155
+
156
+ # Start with products that have no dependencies
157
+ for product in products_with_hierarchy:
158
+ if in_degree[product] == 0:
159
+ queue.append(product)
160
+
161
+ while queue:
162
+ current = queue.popleft()
163
+ sorted_products.append(current)
164
+
165
+ # Process dependents - sort by hierarchy level first
166
+ for dependent in sorted(graph[current], key=lambda p: (kit_levels.get(p, 999), p)):
167
+ in_degree[dependent] -= 1 # decrement the in_degree of the dependent
168
+ if in_degree[dependent] == 0: # if the in_degree of the dependent is 0, add it to the queue so that it can be processed
169
+ queue.append(dependent)
170
+
171
+ # Check for cycles (shouldn't happen with proper hierarchy)
172
+ if len(sorted_products) != len(products_with_hierarchy):
173
+ remaining = [p for p in products_with_hierarchy if p not in sorted_products]
174
+ print(f"[HIERARCHY] WARNING: Potential circular dependencies detected in: {remaining}")
175
+ # Add remaining products sorted by level as fallback
176
+ remaining_sorted = sorted(remaining, key=lambda p: (kit_levels.get(p, 999), p))
177
+ sorted_products.extend(remaining_sorted)
178
+
179
+ # Add products without hierarchy information at the end
180
+ sorted_products.extend(sorted(products_without_hierarchy))
181
+
182
+ print(f"[HIERARCHY] Dependency-aware production order: {len(sorted_products)} products")
183
+ for i, p in enumerate(sorted_products[:10]): # Show first 10
184
+ level = kit_levels.get(p, "unknown")
185
+ # Import here to avoid circular dependency
186
+ try:
187
+ from src.config.constants import KitLevel
188
+ level_name = KitLevel.get_name(level)
189
+ except:
190
+ level_name = f"level_{level}"
191
+ deps = kit_dependencies.get(p, [])
192
+ deps_in_list = [d for d in deps if d in products_with_hierarchy]
193
+ print(f" {i+1}. {p} (level {level}={level_name}, deps: {len(deps_in_list)})")
194
+ if deps_in_list:
195
+ print(f" Dependencies: {deps_in_list}")
196
+
197
+ if len(sorted_products) > 10:
198
+ print(f" ... and {len(sorted_products) - 10} more products")
199
+
200
+ return sorted_products
201
+
202
+
203
+ def main():
204
+ """Demo the hierarchy parser"""
205
+ parser = KitHierarchyParser()
206
+
207
+ print("🔄 Parsing kit hierarchy...")
208
+ hierarchy = parser.parse_hierarchy()
209
+
210
+ #export to json
211
+ with open('data/hierarchy_exports/kit_hierarchy.json', 'w') as f:
212
+ json.dump(hierarchy, f,indent=4)
213
+
214
+ print(f"📊 Found {len(hierarchy)} master kits")
215
+
216
+
217
+
218
+ if __name__ == "__main__":
219
+ main()
src/preprocess/kit_composition_cleaner.py ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Kit Composition Data Cleaner
3
+
4
+ This script converts the Kit_Composition_and_relation.csv file into a cleaned format
5
+ with line types according to the following rules:
6
+
7
+ 1. Master Kits:
8
+ - If appears only once (standalone master): line_type = "long line"
9
+ - If appears multiple times: line_type = "" (empty/theoretical)
10
+
11
+ 2. Sub Kits:
12
+ - All sub kits get line_type = "long line"
13
+
14
+ 3. Prepacks:
15
+ - All prepacks get line_type = "miniload"
16
+
17
+ The output includes columns: kit_name, kit_description, kit_type, line_type
18
+ """
19
+
20
+ import pandas as pd
21
+ import os
22
+ from typing import Tuple
23
+
24
+
25
+ class KitCompositionCleaner:
26
+ """
27
+ Cleans and processes kit composition data with line type assignments.
28
+
29
+ This class maintains state across processing steps, allowing for:
30
+ - Single data load
31
+ - Step-by-step processing
32
+ - Intermediate result storage
33
+ """
34
+
35
+ def __init__(self, input_file: str, output_file: str = None):
36
+ """
37
+ Initialize the cleaner with file paths.
38
+
39
+ Args:
40
+ input_file: Path to input CSV file (Kit_Composition_and_relation.csv)
41
+ output_file: Path to output CSV file (optional, can be set later)
42
+ """
43
+ self.input_file = input_file
44
+ self.output_file = output_file
45
+
46
+ # State variables for processing pipeline
47
+ self.df = None
48
+ self.master_df = None
49
+ self.subkit_df = None
50
+ self.prepack_df = None
51
+ self.final_df = None
52
+
53
+ def load_data(self) -> pd.DataFrame:
54
+ """Load the Kit Composition and relation CSV file."""
55
+ if not os.path.exists(self.input_file):
56
+ raise FileNotFoundError(f"File not found: {self.input_file}")
57
+
58
+ self.df = pd.read_csv(self.input_file)
59
+ print(f"Loaded {len(self.df)} rows from {self.input_file}")
60
+ return self.df
61
+
62
+ def process_master_kits(self) -> pd.DataFrame:
63
+ """
64
+ Process Master Kits according to business rules:
65
+ - Standalone masters (no subkits/prepacks, only components): line_type = "long line"
66
+ - Non-standalone masters (have subkits/prepacks): line_type = "" (empty - no production needed)
67
+ """
68
+ if self.df is None:
69
+ raise ValueError("Data not loaded. Call load_data() first.")
70
+
71
+ print("Processing Master Kits...")
72
+
73
+ # Identify masters with hierarchy (subkits or prepacks)
74
+ masters_with_subkits = set(self.df[self.df['Sub kit'].notna()]['Master Kit'].unique())
75
+ masters_with_prepacks = set(self.df[self.df['Prepack'].notna()]['Master Kit'].unique())
76
+ masters_with_hierarchy = masters_with_subkits.union(masters_with_prepacks)
77
+
78
+ # All masters
79
+ all_masters = set(self.df['Master Kit'].unique())
80
+
81
+ # Standalone masters are those WITHOUT subkits/prepacks (only have components)
82
+ standalone_masters = all_masters - masters_with_hierarchy
83
+
84
+ print(f"Total unique Master Kits: {len(all_masters)}")
85
+ print(f"Masters with subkits/prepacks: {len(masters_with_hierarchy)}")
86
+ print(f"Standalone masters (only components): {len(standalone_masters)}")
87
+
88
+ # Create master kit records
89
+ master_data = []
90
+
91
+ # Get unique master kits with descriptions
92
+ unique_masters = self.df[['Master Kit', 'Master Kit Description']].drop_duplicates()
93
+
94
+ for _, row in unique_masters.iterrows():
95
+ master_kit = row['Master Kit']
96
+ master_desc = row['Master Kit Description']
97
+
98
+ # Determine line_type based on standalone status
99
+ if master_kit in standalone_masters:
100
+ line_type = "long line"
101
+ else:
102
+ line_type = "" # Empty for non-standalone (theoretical)
103
+
104
+ master_data.append({
105
+ 'kit_name': master_kit,
106
+ 'kit_description': master_desc,
107
+ 'kit_type': 'master',
108
+ 'line_type': line_type
109
+ })
110
+
111
+ self.master_df = pd.DataFrame(master_data)
112
+
113
+
114
+ return self.master_df
115
+
116
+ def process_sub_kits(self) -> pd.DataFrame:
117
+ """
118
+ Process Sub Kits according to business rules:
119
+ - All sub kits get line_type = "long line"
120
+ - Remove duplicates
121
+ """
122
+ if self.df is None:
123
+ raise ValueError("Data not loaded. Call load_data() first.")
124
+
125
+ print("Processing Sub Kits...")
126
+
127
+ # Filter rows that have sub kits
128
+ subkit_df = self.df[self.df['Sub kit'].notna()].copy()
129
+
130
+ if len(subkit_df) == 0:
131
+ print("No sub kits found")
132
+ self.subkit_df = pd.DataFrame(columns=['kit_name', 'kit_description', 'kit_type', 'line_type'])
133
+ return self.subkit_df
134
+
135
+ # Get unique sub kits with descriptions
136
+ unique_subkits = subkit_df[['Sub kit', 'Sub kit description']].drop_duplicates()
137
+
138
+ subkit_data = []
139
+ for _, row in unique_subkits.iterrows():
140
+ subkit_data.append({
141
+ 'kit_name': row['Sub kit'],
142
+ 'kit_description': row['Sub kit description'],
143
+ 'kit_type': 'subkit',
144
+ 'line_type': 'long line'
145
+ })
146
+
147
+ self.subkit_df = pd.DataFrame(subkit_data)
148
+ print(f"Created {len(self.subkit_df)} sub kit records")
149
+
150
+ return self.subkit_df
151
+
152
+ def process_prepacks(self) -> pd.DataFrame:
153
+ """
154
+ Process Prepacks according to business rules:
155
+ - All prepacks get line_type = "miniload"
156
+ - Remove duplicates
157
+ """
158
+ if self.df is None:
159
+ raise ValueError("Data not loaded. Call load_data() first.")
160
+
161
+ print("Processing Prepacks...")
162
+
163
+ # Filter rows that have prepacks
164
+ prepack_df = self.df[self.df['Prepack'].notna()].copy()
165
+
166
+ if len(prepack_df) == 0:
167
+ print("No prepacks found")
168
+ self.prepack_df = pd.DataFrame(columns=['kit_name', 'kit_description', 'kit_type', 'line_type'])
169
+ return self.prepack_df
170
+
171
+ # Get unique prepacks with descriptions
172
+ unique_prepacks = prepack_df[['Prepack', 'Prepack Description']].drop_duplicates()
173
+
174
+ prepack_data = []
175
+ for _, row in unique_prepacks.iterrows():
176
+ prepack_data.append({
177
+ 'kit_name': row['Prepack'],
178
+ 'kit_description': row['Prepack Description'],
179
+ 'kit_type': 'prepack',
180
+ 'line_type': 'miniload'
181
+ })
182
+
183
+ self.prepack_df = pd.DataFrame(prepack_data)
184
+ print(f"Created {len(self.prepack_df)} prepack records")
185
+
186
+ return self.prepack_df
187
+
188
+ def concatenate_and_save(self, output_path: str = None) -> pd.DataFrame:
189
+ """
190
+ Concatenate all processed dataframes and save to output file.
191
+
192
+ Args:
193
+ output_path: Path to save the output file (uses self.output_file if not provided)
194
+ """
195
+ if self.master_df is None or self.subkit_df is None or self.prepack_df is None:
196
+ raise ValueError("Processing not complete. Run process_master_kits(), process_sub_kits(), and process_prepacks() first.")
197
+
198
+ print("Concatenating results...")
199
+
200
+ # Concatenate all dataframes
201
+ self.final_df = pd.concat([self.master_df, self.subkit_df, self.prepack_df], ignore_index=True)
202
+
203
+ # Ensure empty strings instead of NaN for line_type
204
+ self.final_df['line_type'] = self.final_df['line_type'].fillna('')
205
+
206
+ # Sort by kit_type for better organization
207
+ self.final_df = self.final_df.sort_values(['kit_type', 'kit_name']).reset_index(drop=True)
208
+
209
+ print(f"Final dataset contains {len(self.final_df)} records:")
210
+ print(f" - Masters: {len(self.master_df)}")
211
+ print(f" - Subkits: {len(self.subkit_df)}")
212
+ print(f" - Prepacks: {len(self.prepack_df)}")
213
+
214
+ # Determine output path
215
+ save_path = output_path or self.output_file
216
+ if save_path is None:
217
+ raise ValueError("No output path provided. Specify output_path parameter or set self.output_file")
218
+
219
+ # Save to file (keep empty strings as empty, not NaN)
220
+ self.final_df.to_csv(save_path, index=False, na_rep='')
221
+ print(f"Saved cleaned data to: {save_path}")
222
+
223
+ return self.final_df
224
+
225
+
226
+ def main():
227
+ """Main function to execute the kit composition cleaning process."""
228
+ # Define file paths
229
+ base_dir = "/Users/halimjun/Coding_local/SD_roster_real"
230
+ input_file = os.path.join(base_dir, "data/real_data_excel/converted_csv/Kit_Composition_and_relation.csv")
231
+ output_file = os.path.join(base_dir, "data/real_data_excel/converted_csv/Kit_Composition_and_relation_cleaned_with_line_type.csv")
232
+
233
+ try:
234
+ # Initialize cleaner with class
235
+ cleaner = KitCompositionCleaner(input_file, output_file)
236
+
237
+ # Execute pipeline step by step
238
+ cleaner.load_data()
239
+ cleaner.process_master_kits()
240
+ cleaner.process_sub_kits()
241
+ cleaner.process_prepacks()
242
+ final_df = cleaner.concatenate_and_save()
243
+
244
+ # Display summary statistics
245
+ print("Line type distribution:")
246
+ print(final_df['line_type'].value_counts(dropna=False))
247
+ print("\nKit type distribution:")
248
+ print(final_df['kit_type'].value_counts())
249
+
250
+ print("\nSample of final data:")
251
+ print(final_df.head(10))
252
+
253
+ except Exception as e:
254
+ print(f"❌ Error processing kit composition data: {e}")
255
+ raise
256
+
257
+
258
+ if __name__ == "__main__":
259
+ main()
src/preprocess/transform.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import src.preprocess.extract as ex
3
+
4
+
5
+ def get_product_list(start_date=None):
6
+ """
7
+ Get unique product list from demand data
8
+
9
+ Args:
10
+ start_date: start date to filter data. Required.
11
+ """
12
+ demand = ex.read_orders_data(start_date=start_date)
13
+ print(demand["Material Number"].unique())
14
+ return demand["Material Number"].unique()
15
+
16
+
17
+ def get_employee_list():
18
+ employee = ex.read_employee_data()
19
+ employee = employee["Description"]
20
+
21
+ return employee["Employee_Type"].unique()
22
+
23
+
24
+ def get_released_product_list(start_date=None):
25
+ """
26
+ get released product list from COOIS_Released_Prod_Orders.csv
27
+
28
+ Args:
29
+ start_date: start date to filter data. Required.
30
+ """
31
+ released_orders = ex.read_orders_data(
32
+ start_date=start_date,
33
+ )
34
+ product_list = released_orders["Material Number"].unique().tolist()
35
+ print(f"Released products for date range {start_date}: {len(product_list)} products")
36
+ return product_list
37
+
38
+
39
+ def get_available_dates():
40
+ """
41
+ get available all dates from COOIS_Released_Prod_Orders.csv
42
+
43
+ Returns:
44
+ tuple: (start_dates, end_dates) - unique start dates and end dates list
45
+ """
46
+ released_orders = ex.read_orders_data()
47
+
48
+ released_orders["Basic start date"] = pd.to_datetime(released_orders["Basic start date"])
49
+ released_orders["Basic finish date"] = pd.to_datetime(released_orders["Basic finish date"])
50
+
51
+ start_dates = sorted(released_orders["Basic start date"].dt.date.unique())
52
+ end_dates = sorted(released_orders["Basic finish date"].dt.date.unique())
53
+
54
+ all_dates = sorted(set(start_dates + end_dates))
55
+
56
+ return all_dates, start_dates, end_dates
57
+
58
+
59
+ def get_date_ranges():
60
+ """
61
+ get available (start_date, end_date) combinations
62
+ Returns:
63
+ list : available (start_date, end_date) combinations
64
+ """
65
+ released_orders = ex.read_orders_data()
66
+
67
+ released_orders["Basic start date"] = pd.to_datetime(released_orders["Basic start date"])
68
+ released_orders["Basic finish date"] = pd.to_datetime(released_orders["Basic finish date"])
69
+
70
+ date_ranges = released_orders[["Basic start date", "Basic finish date"]].drop_duplicates()
71
+ date_ranges["start_date"] = date_ranges["Basic start date"].dt.date
72
+ date_ranges["end_date"] = date_ranges["Basic finish date"].dt.date
73
+
74
+ ranges = [(row["start_date"], row["end_date"]) for _, row in date_ranges.iterrows()]
75
+ ranges = sorted(set(ranges))
76
+
77
+ return ranges
78
+
79
+
src/visualization/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ """
2
+ Visualization package for Supply Roster Optimization Tool
3
+ Provides visualization dashboards and charts for optimization results
4
+ """
5
+
src/visualization/hierarchy_dashboard.py ADDED
@@ -0,0 +1,554 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Hierarchy-Based Production Flow Visualization
3
+ Shows how kits flow through production based on dependency hierarchy
4
+ """
5
+
6
+ import streamlit as st
7
+ import pandas as pd
8
+ import plotly.express as px
9
+ import plotly.graph_objects as go
10
+ from plotly.subplots import make_subplots
11
+ try:
12
+ import networkx as nx
13
+ NETWORKX_AVAILABLE = True
14
+ except ImportError:
15
+ NETWORKX_AVAILABLE = False
16
+ nx = None
17
+
18
+ import numpy as np
19
+ import sys
20
+
21
+ from src.config.optimization_config import (
22
+ KIT_LEVELS, KIT_DEPENDENCIES, TEAM_REQ_PER_PRODUCT,
23
+ shift_code_to_name, line_code_to_name
24
+ )
25
+ from src.config.constants import ShiftType, LineType, KitLevel
26
+
27
+ # Import kit relationships dashboard
28
+ try:
29
+ from src.visualization.kit_relationships import display_kit_relationships_dashboard
30
+ except ImportError:
31
+ display_kit_relationships_dashboard = None
32
+
33
+ def display_hierarchy_operations_dashboard(results):
34
+ """Enhanced operations dashboard showing hierarchy-based production flow"""
35
+ st.header("🏭 Hierarchy-Based Operations Dashboard")
36
+ st.markdown("---")
37
+
38
+ # Create main dashboard tabs
39
+ tab1, tab2, tab3 = st.tabs([
40
+ "🔄 Production Flow",
41
+ "📊 Hierarchy Analytics",
42
+ "🔗 Kit Relationships"
43
+ ])
44
+
45
+ with tab1:
46
+ display_production_flow_visualization(results)
47
+
48
+ with tab2:
49
+ display_hierarchy_analytics(results)
50
+
51
+ with tab3:
52
+ # Kit relationships from actual hierarchy data
53
+ if display_kit_relationships_dashboard:
54
+ display_kit_relationships_dashboard(results)
55
+ else:
56
+ st.error("Kit relationships dashboard not available. Please check installation.")
57
+
58
+ def display_production_flow_visualization(results):
59
+ """Show how products flow through production lines by hierarchy"""
60
+ st.subheader("🔄 Kit Production Flow by Hierarchy")
61
+
62
+ # Get production sequence data
63
+ flow_data = prepare_hierarchy_flow_data(results)
64
+
65
+ if not flow_data:
66
+ st.warning("No production data available for flow visualization")
67
+ return
68
+
69
+ # Create flow diagram
70
+
71
+
72
+
73
+ # Hierarchy level summary - horizontal layout
74
+ st.subheader("📦 Production by Level")
75
+ level_summary = get_hierarchy_level_summary(flow_data)
76
+
77
+ # Create horizontal columns for each level
78
+ level_names = ['prepack', 'subkit', 'master']
79
+ available_levels = [level for level in level_names if level in level_summary]
80
+
81
+ if available_levels:
82
+ cols = st.columns(len(available_levels))
83
+
84
+ for i, level_name in enumerate(available_levels):
85
+ data = level_summary[level_name]
86
+ with cols[i]:
87
+ # Use custom styling instead of st.metric to avoid delta arrows
88
+ st.markdown(f"""
89
+ <div style="
90
+ background: linear-gradient(135deg, #f0f8ff, #e6f3ff);
91
+ padding: 1rem;
92
+ border-radius: 0.5rem;
93
+ text-align: center;
94
+ border-left: 4px solid {'#90EE90' if level_name == 'prepack' else '#FFD700' if level_name == 'subkit' else '#FF6347'};
95
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
96
+ ">
97
+ <div style="font-size: 0.8rem; color: #666; text-transform: uppercase; letter-spacing: 1px;">
98
+ {level_name.title()} Kits
99
+ </div>
100
+ <div style="font-size: 1.5rem; font-weight: bold; color: #333; margin: 0.2rem 0;">
101
+ {data['count']} products
102
+ </div>
103
+ <div style="font-size: 1rem; color: #555;">
104
+ {data['total_units']:,.0f} units
105
+ </div>
106
+ </div>
107
+ """, unsafe_allow_html=True)
108
+
109
+ # Timeline view of hierarchy production
110
+ st.subheader("📅 Hierarchy Production Timeline")
111
+ try:
112
+ fig_timeline = create_hierarchy_timeline(flow_data)
113
+ st.plotly_chart(fig_timeline, use_container_width=True)
114
+ except Exception as e:
115
+ st.warning(f"Timeline chart temporarily unavailable. Showing alternative visualization.")
116
+ # Fallback: Simple bar chart by day
117
+ if flow_data:
118
+ df_simple = pd.DataFrame([{
119
+ 'Day': f"Day {row['day']}",
120
+ 'Level': row['level_name'].title(),
121
+ 'Units': row['units'],
122
+ 'Product': row['product']
123
+ } for row in flow_data])
124
+
125
+ fig_simple = px.bar(df_simple, x='Day', y='Units', color='Level',
126
+ title='Production Volume by Day and Hierarchy Level',
127
+ color_discrete_map={
128
+ 'Prepack': '#90EE90',
129
+ 'Subkit': '#FFD700',
130
+ 'Master': '#FF6347'
131
+ })
132
+ st.plotly_chart(fig_simple, use_container_width=True)
133
+
134
+ def display_hierarchy_analytics(results):
135
+ """Deep dive analytics on hierarchy production performance"""
136
+ st.subheader("📊 Hierarchy Performance Analytics")
137
+
138
+ # Prepare analytics data
139
+ analytics_data = prepare_hierarchy_analytics_data(results)
140
+
141
+ if not analytics_data:
142
+ st.warning("No hierarchy data available for analytics")
143
+ return
144
+
145
+ # Key metrics
146
+ col1, col2, col3, col4 = st.columns(4)
147
+
148
+ with col1:
149
+ prepack_efficiency = analytics_data.get('prepack_efficiency', 0)
150
+ st.metric("Prepack Efficiency", f"{prepack_efficiency:.1f}%",
151
+ delta=f"{prepack_efficiency-95:.1f}%" if prepack_efficiency != 95 else None)
152
+
153
+ with col2:
154
+ dependency_violations = analytics_data.get('dependency_violations', 0)
155
+ st.metric("Dependency Violations", f"{dependency_violations}",
156
+ delta=f"-{dependency_violations}" if dependency_violations > 0 else None)
157
+
158
+ with col3:
159
+ avg_lead_time = analytics_data.get('avg_lead_time', 0)
160
+ st.metric("Avg Lead Time", f"{avg_lead_time:.1f} days")
161
+
162
+ with col4:
163
+ hierarchy_cost_efficiency = analytics_data.get('cost_efficiency', 0)
164
+ st.metric("Cost Efficiency", f"€{hierarchy_cost_efficiency:.2f}/unit")
165
+
166
+ # Dependency flow chart
167
+ st.subheader("🔗 Dependency Network Analysis")
168
+ fig_network = create_dependency_network_chart(analytics_data)
169
+ st.plotly_chart(fig_network, use_container_width=True)
170
+
171
+ # Production heatmap
172
+ st.subheader("🔥 Hierarchy Production Heatmap")
173
+ heatmap_fig = create_hierarchy_heatmap(results)
174
+ st.plotly_chart(heatmap_fig, use_container_width=True)
175
+
176
+
177
+
178
+ # Removed display_enhanced_line_utilization function - utilization concept removed
179
+
180
+ def display_production_sequence_analysis(results):
181
+ """Analyze production sequence and timing"""
182
+ st.subheader("🎯 Production Sequence Analysis")
183
+
184
+
185
+ if not sequence_data:
186
+ st.warning("No sequence data available")
187
+ return
188
+
189
+ # Sequence adherence metrics
190
+ col1, col2, col3 = st.columns(3)
191
+
192
+ with col1:
193
+ sequence_score = sequence_data.get('sequence_adherence_score', 0)
194
+ st.metric("Sequence Adherence", f"{sequence_score:.1f}%",
195
+ help="How well production follows optimal hierarchy sequence")
196
+
197
+ with col2:
198
+ early_productions = sequence_data.get('early_productions', 0)
199
+ st.metric("Early Productions", f"{early_productions}",
200
+ help="Products produced before their dependencies")
201
+
202
+ with col3:
203
+ optimal_sequences = sequence_data.get('optimal_sequences', 0)
204
+ st.metric("Optimal Sequences", f"{optimal_sequences}%",
205
+ help="Percentage of products following optimal sequence")
206
+
207
+ # Sequence violation chart
208
+ if sequence_data.get('violations'):
209
+ st.subheader("⚠️ Sequence Violations")
210
+ violations_df = pd.DataFrame(sequence_data['violations'])
211
+
212
+ fig = px.scatter(violations_df,
213
+ x='production_day', y='dependency_day',
214
+ color='severity', size='impact',
215
+ hover_data=['product', 'dependency'],
216
+ title='Production vs Dependency Timing (Violations in Red)',
217
+ labels={'production_day': 'When Product Was Made',
218
+ 'dependency_day': 'When Dependency Was Made'})
219
+
220
+ # Add diagonal line (should be above this line)
221
+ max_day = max(violations_df['production_day'].max(), violations_df['dependency_day'].max())
222
+ fig.add_shape(type="line", x0=0, y0=0, x1=max_day, y1=max_day,
223
+ line=dict(dash="dash", color="gray"),
224
+ name="Ideal Sequence Line")
225
+
226
+ st.plotly_chart(fig, use_container_width=True)
227
+
228
+ # Sequence optimization suggestions
229
+ st.subheader("💡 Optimization Suggestions")
230
+ suggestions = generate_sequence_suggestions(sequence_data)
231
+ for suggestion in suggestions:
232
+ st.info(f"💡 {suggestion}")
233
+
234
+ # Helper Functions
235
+
236
+ def prepare_hierarchy_flow_data(results):
237
+ """Prepare data for hierarchy flow visualization"""
238
+ flow_data = []
239
+
240
+ for row in results['run_schedule']:
241
+ product = row['product']
242
+ level = KIT_LEVELS.get(product, KitLevel.MASTER)
243
+ level_name = KitLevel.get_name(level)
244
+
245
+ flow_data.append({
246
+ 'product': product,
247
+ 'level': level,
248
+ 'level_name': level_name,
249
+ 'day': row['day'],
250
+ 'shift': row['shift'],
251
+ 'line_type': row['line_type_id'],
252
+ 'line_idx': row['line_idx'],
253
+ 'hours': row['run_hours'],
254
+ 'units': row['units'],
255
+ 'dependencies': KIT_DEPENDENCIES.get(product, [])
256
+ })
257
+
258
+ return flow_data
259
+
260
+ def create_hierarchy_timeline(flow_data):
261
+ """Create timeline showing hierarchy production sequence"""
262
+ if not flow_data:
263
+ return go.Figure()
264
+
265
+ # Prepare timeline data with proper datetime conversion
266
+ timeline_data = []
267
+
268
+ from datetime import datetime, timedelta
269
+ base_date = datetime(2025, 1, 1) # Base date for timeline
270
+
271
+ for row in flow_data:
272
+ shift_name = ShiftType.get_name(row['shift'])
273
+ line_name = LineType.get_name(row['line_type'])
274
+
275
+ # Create start and end times for the production run
276
+ start_date = base_date + timedelta(days=row['day']-1)
277
+ end_date = start_date + timedelta(hours=row['hours'])
278
+
279
+ timeline_data.append({
280
+ 'Product': row['product'],
281
+ 'Level': row['level_name'].title(),
282
+ 'Start': start_date,
283
+ 'End': end_date,
284
+ 'Day': f"Day {row['day']}",
285
+ 'Shift': shift_name,
286
+ 'Line': f"{line_name} {row['line_idx']}",
287
+ 'Units': row['units'],
288
+ 'Hours': row['hours'],
289
+ 'Priority': row['level'] # For sorting
290
+ })
291
+
292
+ df = pd.DataFrame(timeline_data)
293
+
294
+ if df.empty:
295
+ return go.Figure()
296
+
297
+ # Create timeline chart with proper datetime columns
298
+ fig = px.timeline(df,
299
+ x_start='Start', x_end='End',
300
+ y='Line',
301
+ color='Level',
302
+ hover_data=['Product', 'Units', 'Hours', 'Shift', 'Day'],
303
+ title='Production Timeline by Hierarchy Level',
304
+ color_discrete_map={
305
+ 'Prepack': '#90EE90',
306
+ 'Subkit': '#FFD700',
307
+ 'Master': '#FF6347'
308
+ })
309
+
310
+ fig.update_layout(
311
+ height=500,
312
+ xaxis_title='Production Timeline',
313
+ yaxis_title='Production Line'
314
+ )
315
+
316
+ return fig
317
+
318
+ def prepare_hierarchy_analytics_data(results):
319
+ """Prepare analytics data for hierarchy performance"""
320
+ analytics = {
321
+ 'prepack_efficiency': 0,
322
+ 'dependency_violations': 0,
323
+ 'avg_lead_time': 0,
324
+ 'cost_efficiency': 0,
325
+ 'violations': [],
326
+ 'dependencies': KIT_DEPENDENCIES
327
+ }
328
+
329
+ # Calculate metrics
330
+ total_cost = results.get('objective', 0)
331
+ total_units = sum(results.get('weekly_production', {}).values())
332
+
333
+ if total_units > 0:
334
+ analytics['cost_efficiency'] = total_cost / total_units
335
+
336
+ # Analyze dependency violations
337
+ production_times = {}
338
+ for row in results['run_schedule']:
339
+ product = row['product']
340
+ day = row['day']
341
+ if product not in production_times or day < production_times[product]:
342
+ production_times[product] = day
343
+
344
+ violations = 0
345
+ violation_details = []
346
+
347
+ for product, prod_day in production_times.items():
348
+ dependencies = KIT_DEPENDENCIES.get(product, [])
349
+ for dep in dependencies:
350
+ if dep in production_times:
351
+ dep_day = production_times[dep]
352
+ if dep_day > prod_day: # Dependency produced after product
353
+ violations += 1
354
+ violation_details.append({
355
+ 'product': product,
356
+ 'dependency': dep,
357
+ 'production_day': prod_day,
358
+ 'dependency_day': dep_day,
359
+ 'severity': 'high' if dep_day - prod_day > 1 else 'medium',
360
+ 'impact': abs(dep_day - prod_day)
361
+ })
362
+
363
+ analytics['dependency_violations'] = violations
364
+ analytics['violations'] = violation_details
365
+
366
+ return analytics
367
+
368
+ # Removed calculate_hierarchy_line_utilization and create_utilization_gauge functions
369
+ # - utilization concept removed from dashboard
370
+
371
+ def create_hierarchy_heatmap(results):
372
+ """Create heatmap showing hierarchy production by line and day"""
373
+ # Prepare heatmap data
374
+ heatmap_data = []
375
+
376
+ for row in results['run_schedule']:
377
+ product = row['product']
378
+ level_name = KitLevel.get_name(KIT_LEVELS.get(product, KitLevel.MASTER))
379
+ line_name = f"{LineType.get_name(row['line_type_id'])} {row['line_idx']}"
380
+
381
+ heatmap_data.append({
382
+ 'Line': line_name,
383
+ 'Day': f"Day {row['day']}",
384
+ 'Level': level_name,
385
+ 'Units': row['units'],
386
+ 'Hours': row['run_hours']
387
+ })
388
+
389
+ if not heatmap_data:
390
+ return go.Figure()
391
+
392
+ df = pd.DataFrame(heatmap_data)
393
+
394
+ # Pivot for heatmap
395
+ pivot_df = df.pivot_table(
396
+ values='Units',
397
+ index='Line',
398
+ columns='Day',
399
+ aggfunc='sum',
400
+ fill_value=0
401
+ )
402
+
403
+ fig = px.imshow(pivot_df.values,
404
+ x=pivot_df.columns,
405
+ y=pivot_df.index,
406
+ color_continuous_scale='Blues',
407
+ title='Production Volume Heatmap (Units per Day)',
408
+ labels=dict(x="Day", y="Production Line", color="Units"))
409
+
410
+ return fig
411
+
412
+ def create_dependency_network_chart(analytics_data):
413
+ """Create network chart showing dependency relationships"""
414
+ dependencies = analytics_data.get('dependencies', {})
415
+
416
+ if not dependencies or not NETWORKX_AVAILABLE:
417
+ return go.Figure().add_annotation(
418
+ text="Dependency network visualization requires 'networkx' package. Install with: pip install networkx" if not NETWORKX_AVAILABLE else "No dependency relationships to display",
419
+ xref="paper", yref="paper",
420
+ x=0.5, y=0.5, showarrow=False
421
+ )
422
+
423
+ # Create network graph
424
+ G = nx.DiGraph()
425
+
426
+ # Add nodes and edges
427
+ for product, deps in dependencies.items():
428
+ if product and deps: # Only if product has dependencies
429
+ G.add_node(product)
430
+ for dep in deps:
431
+ if dep: # Only if dependency exists
432
+ G.add_node(dep)
433
+ G.add_edge(dep, product) # Dependency -> Product
434
+
435
+ if len(G.nodes()) == 0:
436
+ return go.Figure().add_annotation(
437
+ text="No dependency relationships to display",
438
+ xref="paper", yref="paper",
439
+ x=0.5, y=0.5, showarrow=False
440
+ )
441
+
442
+ # Calculate layout
443
+ pos = nx.spring_layout(G, k=3, iterations=50)
444
+
445
+ # Create edge traces
446
+ edge_x = []
447
+ edge_y = []
448
+ for edge in G.edges():
449
+ x0, y0 = pos[edge[0]]
450
+ x1, y1 = pos[edge[1]]
451
+ edge_x.extend([x0, x1, None])
452
+ edge_y.extend([y0, y1, None])
453
+
454
+ edge_trace = go.Scatter(x=edge_x, y=edge_y,
455
+ line=dict(width=0.5, color='#888'),
456
+ hoverinfo='none',
457
+ mode='lines')
458
+
459
+ # Create node traces
460
+ node_x = []
461
+ node_y = []
462
+ node_text = []
463
+ node_color = []
464
+
465
+ for node in G.nodes():
466
+ x, y = pos[node]
467
+ node_x.append(x)
468
+ node_y.append(y)
469
+ node_text.append(node)
470
+
471
+ # Color by hierarchy level
472
+ level = KIT_LEVELS.get(node, KitLevel.MASTER)
473
+ if level == KitLevel.PREPACK:
474
+ node_color.append('#90EE90')
475
+ elif level == KitLevel.SUBKIT:
476
+ node_color.append('#FFD700')
477
+ else:
478
+ node_color.append('#FF6347')
479
+
480
+ node_trace = go.Scatter(x=node_x, y=node_y,
481
+ mode='markers+text',
482
+ text=node_text,
483
+ textposition='middle center',
484
+ marker=dict(size=20, color=node_color, line=dict(width=2, color='black')),
485
+ hoverinfo='text',
486
+ hovertext=node_text)
487
+
488
+ fig = go.Figure(data=[edge_trace, node_trace],
489
+ layout=go.Layout(
490
+ title='Kit Dependency Network',
491
+ titlefont_size=16,
492
+ showlegend=False,
493
+ hovermode='closest',
494
+ margin=dict(b=20,l=5,r=5,t=40),
495
+ annotations=[ dict(
496
+ text="Green=Prepack, Gold=Subkit, Red=Master",
497
+ showarrow=False,
498
+ xref="paper", yref="paper",
499
+ x=0.005, y=-0.002,
500
+ xanchor='left', yanchor='bottom',
501
+ font=dict(size=12)
502
+ )],
503
+ xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
504
+ yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))
505
+
506
+ return fig
507
+
508
+
509
+
510
+
511
+
512
+ def generate_sequence_suggestions(sequence_data):
513
+ """Generate optimization suggestions based on sequence analysis"""
514
+ suggestions = []
515
+
516
+ adherence = sequence_data.get('sequence_adherence_score', 0)
517
+ violations = sequence_data.get('early_productions', 0)
518
+
519
+ if adherence < 80:
520
+ suggestions.append(
521
+ "Consider adjusting production sequence to better follow hierarchy dependencies. "
522
+ "Current adherence is below optimal (80%)."
523
+ )
524
+
525
+ if violations > 0:
526
+ suggestions.append(
527
+ f"Found {violations} dependency violations. Review production scheduling to ensure "
528
+ "prepacks are produced before subkits, and subkits before masters."
529
+ )
530
+
531
+ if adherence >= 95:
532
+ suggestions.append(
533
+ "Excellent sequence adherence! Production is following optimal hierarchy flow."
534
+ )
535
+
536
+ if not suggestions:
537
+ suggestions.append("Production sequence analysis complete. No major issues detected.")
538
+
539
+ return suggestions
540
+
541
+ def get_hierarchy_level_summary(flow_data):
542
+ """Get summary statistics for each hierarchy level"""
543
+ summary = {}
544
+
545
+ for level_name in ['prepack', 'subkit', 'master']:
546
+ level_products = [row for row in flow_data if row['level_name'] == level_name]
547
+
548
+ summary[level_name] = {
549
+ 'count': len(set(row['product'] for row in level_products)),
550
+ 'total_units': sum(row['units'] for row in level_products),
551
+ 'total_hours': sum(row['hours'] for row in level_products)
552
+ }
553
+
554
+ return summary
src/visualization/kit_relationships.py ADDED
@@ -0,0 +1,629 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Kit Relationship Visualization
3
+ Shows the actual dependency relationships between kits in production
4
+ based on kit_hierarchy.json data
5
+ """
6
+
7
+ import streamlit as st
8
+ import pandas as pd
9
+ import plotly.express as px
10
+ import plotly.graph_objects as go
11
+ from plotly.subplots import make_subplots
12
+ import json
13
+ import sys
14
+
15
+ from src.config.constants import ShiftType, LineType, KitLevel
16
+
17
+ # Optional networkx for advanced network layouts
18
+ try:
19
+ import networkx as nx
20
+ NETWORKX_AVAILABLE = True
21
+ except ImportError:
22
+ NETWORKX_AVAILABLE = False
23
+ nx = None
24
+
25
+ def load_kit_hierarchy():
26
+ """Load kit hierarchy data from JSON file"""
27
+ try:
28
+ with open('data/hierarchy_exports/kit_hierarchy.json', 'r') as f:
29
+ return json.load(f)
30
+ except FileNotFoundError:
31
+ st.error("Kit hierarchy file not found. Please ensure kit_hierarchy.json exists in data/hierarchy_exports/")
32
+ return {}
33
+ except json.JSONDecodeError:
34
+ st.error("Invalid kit hierarchy JSON format")
35
+ return {}
36
+
37
+ def display_kit_relationships_dashboard(results):
38
+ """Main dashboard showing kit relationships in production"""
39
+ st.header("🔗 Kit Relationship Dashboard")
40
+ st.markdown("Visualizing dependencies between kits being produced")
41
+ st.markdown("---")
42
+
43
+ # Load hierarchy data
44
+ hierarchy_data = load_kit_hierarchy()
45
+
46
+ if not hierarchy_data:
47
+ st.warning("No kit hierarchy data available")
48
+ return
49
+
50
+ # Get produced kits from results
51
+ produced_kits = set()
52
+ if 'weekly_production' in results:
53
+ produced_kits = set(results['weekly_production'].keys())
54
+ elif 'run_schedule' in results:
55
+ produced_kits = set(row['product'] for row in results['run_schedule'])
56
+
57
+ if not produced_kits:
58
+ st.warning("No production data available")
59
+ return
60
+
61
+ # Create tabs for different relationship views
62
+ tab1, tab2, tab3, tab4 = st.tabs([
63
+ "🌐 Dependency Network",
64
+ "📊 Relationship Matrix",
65
+ "🎯 Production Flow",
66
+ "⚠️ Dependency Analysis"
67
+ ])
68
+
69
+ with tab1:
70
+ display_dependency_network(hierarchy_data, produced_kits, results)
71
+
72
+ with tab2:
73
+ display_relationship_matrix(hierarchy_data, produced_kits, results)
74
+
75
+ with tab3:
76
+ display_production_flow_relationships(hierarchy_data, produced_kits, results)
77
+
78
+ with tab4:
79
+ display_dependency_analysis(hierarchy_data, produced_kits, results)
80
+
81
+ def display_dependency_network(hierarchy_data, produced_kits, results):
82
+ """Show interactive network graph of kit dependencies"""
83
+ st.subheader("🌐 Kit Dependency Network")
84
+ st.markdown("Interactive graph showing which kits depend on other kits")
85
+
86
+ # Build relationship data for produced kits only
87
+ relationships = build_relationship_data(hierarchy_data, produced_kits)
88
+
89
+ if not relationships:
90
+ st.info("No dependency relationships found between produced kits")
91
+ return
92
+
93
+ # Get production timing data
94
+ production_timing = get_production_timing(results)
95
+
96
+ # Create network visualization
97
+ col1, col2 = st.columns([3, 1])
98
+
99
+ with col1:
100
+ if NETWORKX_AVAILABLE:
101
+ fig = create_interactive_network_graph(relationships, production_timing)
102
+ st.plotly_chart(fig, use_container_width=True)
103
+ else:
104
+ fig = create_simple_dependency_chart(relationships, production_timing)
105
+ st.plotly_chart(fig, use_container_width=True)
106
+ st.info("💡 Install networkx for advanced network layouts: `pip install networkx`")
107
+
108
+ with col2:
109
+ # Network statistics
110
+ st.subheader("📈 Network Stats")
111
+
112
+ all_kits = set()
113
+ for rel in relationships:
114
+ all_kits.add(rel['source'])
115
+ all_kits.add(rel['target'])
116
+
117
+ st.metric("Total Kits", len(all_kits))
118
+ st.metric("Dependencies", len(relationships))
119
+
120
+ # Dependency depth analysis
121
+ max_depth = calculate_dependency_depth(relationships)
122
+ st.metric("Max Dependency Depth", max_depth)
123
+
124
+ # Most dependent kits
125
+ dependent_kits = get_most_dependent_kits(relationships)
126
+ st.subheader("🔗 Most Dependencies")
127
+ for kit, count in dependent_kits[:5]:
128
+ st.write(f"**{kit}**: {count} dependencies")
129
+
130
+ def display_relationship_matrix(hierarchy_data, produced_kits, results):
131
+ """Show dependency matrix heatmap"""
132
+ st.subheader("📊 Kit Dependency Matrix")
133
+ st.markdown("Heatmap showing which kits (rows) depend on which other kits (columns)")
134
+
135
+ # Build dependency matrix
136
+ matrix_data = build_dependency_matrix(hierarchy_data, produced_kits)
137
+
138
+ if matrix_data.empty:
139
+ st.info("No dependency relationships to visualize in matrix form")
140
+ return
141
+
142
+ # Create heatmap
143
+ fig = px.imshow(matrix_data.values,
144
+ x=matrix_data.columns,
145
+ y=matrix_data.index,
146
+ color_continuous_scale='Blues',
147
+ title='Kit Dependency Matrix (1 = depends on, 0 = no dependency)',
148
+ labels=dict(x="Dependency (what is needed)",
149
+ y="Kit (what depends on others)",
150
+ color="Dependency"))
151
+
152
+ fig.update_layout(height=600)
153
+ st.plotly_chart(fig, use_container_width=True)
154
+
155
+ # Show matrix as table
156
+ with st.expander("📋 View Dependency Matrix as Table"):
157
+ st.dataframe(matrix_data, use_container_width=True)
158
+
159
+ def display_production_flow_relationships(hierarchy_data, produced_kits, results):
160
+ """Show how relationships affect production timing"""
161
+ st.subheader("🎯 Production Flow with Relationships")
162
+ st.markdown("Timeline showing when dependent kits are produced")
163
+
164
+ # Get production timing and relationships
165
+ production_timing = get_production_timing(results)
166
+ relationships = build_relationship_data(hierarchy_data, produced_kits)
167
+
168
+ if not production_timing or not relationships:
169
+ st.info("Insufficient data for production flow analysis")
170
+ return
171
+
172
+ # Create timeline with dependency arrows
173
+ fig = create_production_timeline_with_dependencies(production_timing, relationships)
174
+ st.plotly_chart(fig, use_container_width=True)
175
+
176
+ # Timing analysis table
177
+ st.subheader("⏰ Dependency Timing Analysis")
178
+ timing_analysis = analyze_dependency_timing(production_timing, relationships)
179
+
180
+ if timing_analysis:
181
+ df = pd.DataFrame(timing_analysis)
182
+ st.dataframe(df, use_container_width=True)
183
+
184
+ def display_dependency_analysis(hierarchy_data, produced_kits, results):
185
+ """Analyze dependency fulfillment and violations"""
186
+ st.subheader("⚠️ Dependency Analysis & Violations")
187
+
188
+ production_timing = get_production_timing(results)
189
+ relationships = build_relationship_data(hierarchy_data, produced_kits)
190
+
191
+ # Analyze violations
192
+ violations = find_dependency_violations(production_timing, relationships)
193
+
194
+ # Summary metrics
195
+ col1, col2, col3, col4 = st.columns(4)
196
+
197
+ with col1:
198
+ total_deps = len(relationships)
199
+ st.metric("Total Dependencies", total_deps)
200
+
201
+ with col2:
202
+ violated_deps = len(violations)
203
+ st.metric("Violations", violated_deps,
204
+ delta=f"-{violated_deps}" if violated_deps > 0 else None)
205
+
206
+ with col3:
207
+ if total_deps > 0:
208
+ success_rate = ((total_deps - violated_deps) / total_deps) * 100
209
+ st.metric("Success Rate", f"{success_rate:.1f}%")
210
+ else:
211
+ st.metric("Success Rate", "N/A")
212
+
213
+ with col4:
214
+ if violations:
215
+ avg_violation = sum(v['days_early'] for v in violations) / len(violations)
216
+ st.metric("Avg Days Early", f"{avg_violation:.1f}")
217
+ else:
218
+ st.metric("Avg Days Early", "0")
219
+
220
+ # Violation details
221
+ if violations:
222
+ st.subheader("🚨 Dependency Violations")
223
+ st.markdown("Cases where kits were produced before their dependencies")
224
+
225
+ violation_df = pd.DataFrame(violations)
226
+
227
+ # Violation severity chart
228
+ fig = px.scatter(violation_df,
229
+ x='dependency_day', y='kit_day',
230
+ size='days_early', color='severity',
231
+ hover_data=['kit', 'dependency'],
232
+ title='Dependency Violations (Below diagonal = violation)',
233
+ labels={'dependency_day': 'When Dependency Was Made',
234
+ 'kit_day': 'When Kit Was Made'})
235
+
236
+ # Add diagonal line showing ideal timing
237
+ max_day = max(violation_df['dependency_day'].max(), violation_df['kit_day'].max())
238
+ fig.add_shape(type="line", x0=0, y0=0, x1=max_day, y1=max_day,
239
+ line=dict(dash="dash", color="green"),
240
+ name="Ideal Timeline")
241
+
242
+ st.plotly_chart(fig, use_container_width=True)
243
+
244
+ # Detailed violation table
245
+ st.dataframe(violation_df[['kit', 'dependency', 'kit_day', 'dependency_day',
246
+ 'days_early', 'severity']], use_container_width=True)
247
+ else:
248
+ st.success("🎉 No dependency violations found! All kits produced in correct order.")
249
+
250
+ # Recommendations
251
+ st.subheader("💡 Recommendations")
252
+ recommendations = generate_dependency_recommendations(violations, relationships, production_timing)
253
+ for rec in recommendations:
254
+ st.info(f"💡 {rec}")
255
+
256
+ # Helper Functions
257
+
258
+ def build_relationship_data(hierarchy_data, produced_kits):
259
+ """Build relationship data for visualization"""
260
+ relationships = []
261
+
262
+ for kit_id, kit_info in hierarchy_data.items():
263
+ if kit_id not in produced_kits:
264
+ continue
265
+
266
+ # Add direct dependencies
267
+ dependencies = kit_info.get('dependencies', [])
268
+ for dep in dependencies:
269
+ if dep in produced_kits: # Only show relationships between produced kits
270
+ relationships.append({
271
+ 'source': dep, # Dependency (what's needed)
272
+ 'target': kit_id, # Kit that depends on it
273
+ 'type': 'direct',
274
+ 'source_type': hierarchy_data.get(dep, {}).get('type', 'unknown'),
275
+ 'target_type': kit_info.get('type', 'unknown')
276
+ })
277
+
278
+ return relationships
279
+
280
+ def build_dependency_matrix(hierarchy_data, produced_kits):
281
+ """Build dependency matrix for heatmap"""
282
+ produced_list = sorted(list(produced_kits))
283
+
284
+ if len(produced_list) == 0:
285
+ return pd.DataFrame()
286
+
287
+ # Initialize matrix
288
+ matrix = pd.DataFrame(0, index=produced_list, columns=produced_list)
289
+
290
+ # Fill matrix with dependencies
291
+ for kit_id in produced_list:
292
+ kit_info = hierarchy_data.get(kit_id, {})
293
+ dependencies = kit_info.get('dependencies', [])
294
+
295
+ for dep in dependencies:
296
+ if dep in produced_list:
297
+ matrix.loc[kit_id, dep] = 1 # kit_id depends on dep
298
+
299
+ return matrix
300
+
301
+ def get_production_timing(results):
302
+ """Extract production timing for each kit"""
303
+ timing = {}
304
+
305
+ if 'run_schedule' in results:
306
+ for run in results['run_schedule']:
307
+ kit = run['product']
308
+ day = run['day']
309
+
310
+ # Use earliest day if kit is produced multiple times
311
+ if kit not in timing or day < timing[kit]:
312
+ timing[kit] = day
313
+
314
+ return timing
315
+
316
+ def create_interactive_network_graph(relationships, production_timing):
317
+ """Create interactive network graph using NetworkX layout"""
318
+ if not NETWORKX_AVAILABLE:
319
+ return create_simple_dependency_chart(relationships, production_timing)
320
+
321
+ # Create NetworkX graph
322
+ G = nx.DiGraph()
323
+
324
+ # Add edges (relationships)
325
+ for rel in relationships:
326
+ G.add_edge(rel['source'], rel['target'], type=rel['type'])
327
+
328
+ if len(G.nodes()) == 0:
329
+ return go.Figure().add_annotation(
330
+ text="No relationships to display",
331
+ xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False
332
+ )
333
+
334
+ # Calculate layout
335
+ pos = nx.spring_layout(G, k=3, iterations=50)
336
+
337
+ # Create edge traces
338
+ edge_x, edge_y = [], []
339
+ edge_info = []
340
+
341
+ for edge in G.edges():
342
+ source, target = edge
343
+ x0, y0 = pos[source]
344
+ x1, y1 = pos[target]
345
+
346
+ edge_x.extend([x0, x1, None])
347
+ edge_y.extend([y0, y1, None])
348
+
349
+ # Add arrow annotation
350
+ edge_info.append({
351
+ 'x': (x0 + x1) / 2,
352
+ 'y': (y0 + y1) / 2,
353
+ 'text': '→',
354
+ 'source': source,
355
+ 'target': target
356
+ })
357
+
358
+ edge_trace = go.Scatter(x=edge_x, y=edge_y,
359
+ line=dict(width=2, color='#888'),
360
+ hoverinfo='none',
361
+ mode='lines')
362
+
363
+ # Create node traces
364
+ node_x, node_y, node_text, node_color, node_size = [], [], [], [], []
365
+ node_info = []
366
+
367
+ for node in G.nodes():
368
+ x, y = pos[node]
369
+ node_x.append(x)
370
+ node_y.append(y)
371
+
372
+ # Node size based on number of connections
373
+ in_degree = G.in_degree(node)
374
+ out_degree = G.out_degree(node)
375
+ total_degree = in_degree + out_degree
376
+ node_size.append(20 + total_degree * 5)
377
+
378
+ # Color by production timing
379
+ prod_day = production_timing.get(node, 0)
380
+ if prod_day == 1:
381
+ node_color.append('#90EE90') # Light green for early
382
+ elif prod_day <= 3:
383
+ node_color.append('#FFD700') # Gold for middle
384
+ else:
385
+ node_color.append('#FF6347') # Tomato for late
386
+
387
+ # Node text and info
388
+ short_name = node[:12] + "..." if len(node) > 12 else node
389
+ node_text.append(short_name)
390
+
391
+ node_info.append(f"{node}<br>Day: {prod_day}<br>In: {in_degree}, Out: {out_degree}")
392
+
393
+ node_trace = go.Scatter(x=node_x, y=node_y,
394
+ mode='markers+text',
395
+ text=node_text,
396
+ textposition='middle center',
397
+ hovertext=node_info,
398
+ hoverinfo='text',
399
+ marker=dict(size=node_size,
400
+ color=node_color,
401
+ line=dict(width=2, color='black')))
402
+
403
+ # Create figure
404
+ fig = go.Figure(data=[edge_trace, node_trace],
405
+ layout=go.Layout(
406
+ title='Kit Dependency Network (Size=Connections, Color=Production Day)',
407
+ showlegend=False,
408
+ hovermode='closest',
409
+ margin=dict(b=20,l=5,r=5,t=40),
410
+ annotations=[
411
+ dict(text="Green=Early, Gold=Middle, Red=Late production",
412
+ showarrow=False,
413
+ xref="paper", yref="paper",
414
+ x=0.005, y=-0.002,
415
+ xanchor='left', yanchor='bottom',
416
+ font=dict(size=12))
417
+ ],
418
+ xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
419
+ yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))
420
+
421
+ return fig
422
+
423
+ def create_simple_dependency_chart(relationships, production_timing):
424
+ """Create simple dependency chart without NetworkX"""
425
+ if not relationships:
426
+ return go.Figure().add_annotation(
427
+ text="No dependencies to display",
428
+ xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False
429
+ )
430
+
431
+ # Create a simple directed graph visualization
432
+ # Group kits by their role (sources, targets)
433
+ sources = set(rel['source'] for rel in relationships)
434
+ targets = set(rel['target'] for rel in relationships)
435
+
436
+ # Create positions
437
+ all_kits = list(sources | targets)
438
+ positions = {kit: (i, production_timing.get(kit, 0)) for i, kit in enumerate(all_kits)}
439
+
440
+ # Create traces
441
+ edge_x, edge_y = [], []
442
+ for rel in relationships:
443
+ source_pos = positions[rel['source']]
444
+ target_pos = positions[rel['target']]
445
+
446
+ edge_x.extend([source_pos[0], target_pos[0], None])
447
+ edge_y.extend([source_pos[1], target_pos[1], None])
448
+
449
+ # Edge trace
450
+ edge_trace = go.Scatter(x=edge_x, y=edge_y,
451
+ line=dict(width=2, color='#888'),
452
+ hoverinfo='none',
453
+ mode='lines')
454
+
455
+ # Node trace
456
+ node_x = [positions[kit][0] for kit in all_kits]
457
+ node_y = [positions[kit][1] for kit in all_kits]
458
+ node_text = [kit[:10] + "..." if len(kit) > 10 else kit for kit in all_kits]
459
+
460
+ node_trace = go.Scatter(x=node_x, y=node_y,
461
+ mode='markers+text',
462
+ text=node_text,
463
+ textposition='top center',
464
+ marker=dict(size=15, color='lightblue',
465
+ line=dict(width=2, color='black')),
466
+ hovertext=all_kits,
467
+ hoverinfo='text')
468
+
469
+ fig = go.Figure(data=[edge_trace, node_trace],
470
+ layout=go.Layout(
471
+ title='Kit Dependencies (Y-axis = Production Day)',
472
+ showlegend=False,
473
+ xaxis=dict(title='Kits'),
474
+ yaxis=dict(title='Production Day')))
475
+
476
+ return fig
477
+
478
+ def create_production_timeline_with_dependencies(production_timing, relationships):
479
+ """Create timeline showing production order with dependency arrows"""
480
+ if not production_timing:
481
+ return go.Figure()
482
+
483
+ # Prepare data
484
+ timeline_data = []
485
+ for kit, day in production_timing.items():
486
+ timeline_data.append({
487
+ 'Kit': kit,
488
+ 'Day': day,
489
+ 'Short_Name': kit[:15] + "..." if len(kit) > 15 else kit
490
+ })
491
+
492
+ df = pd.DataFrame(timeline_data)
493
+
494
+ # Create scatter plot
495
+ fig = px.scatter(df, x='Day', y='Kit',
496
+ hover_data=['Kit'],
497
+ title='Production Timeline with Dependencies')
498
+
499
+ # Add dependency arrows
500
+ for rel in relationships:
501
+ source_day = production_timing.get(rel['source'], 0)
502
+ target_day = production_timing.get(rel['target'], 0)
503
+
504
+ # Add arrow if both kits are in timeline
505
+ if source_day > 0 and target_day > 0:
506
+ fig.add_annotation(
507
+ x=target_day, y=rel['target'],
508
+ ax=source_day, ay=rel['source'],
509
+ arrowhead=2, arrowsize=1, arrowwidth=2,
510
+ arrowcolor="red" if source_day > target_day else "green"
511
+ )
512
+
513
+ fig.update_layout(height=max(400, len(df) * 20))
514
+ return fig
515
+
516
+ def calculate_dependency_depth(relationships):
517
+ """Calculate maximum dependency depth"""
518
+ if not NETWORKX_AVAILABLE or not relationships:
519
+ return 0
520
+
521
+ G = nx.DiGraph()
522
+ for rel in relationships:
523
+ G.add_edge(rel['source'], rel['target'])
524
+
525
+ try:
526
+ return nx.dag_longest_path_length(G)
527
+ except:
528
+ return 0
529
+
530
+ def get_most_dependent_kits(relationships):
531
+ """Get kits with most dependencies"""
532
+ dependency_counts = {}
533
+
534
+ for rel in relationships:
535
+ target = rel['target']
536
+ dependency_counts[target] = dependency_counts.get(target, 0) + 1
537
+
538
+ return sorted(dependency_counts.items(), key=lambda x: x[1], reverse=True)
539
+
540
+ def find_dependency_violations(production_timing, relationships):
541
+ """Find cases where kits were produced before their dependencies"""
542
+ violations = []
543
+
544
+ for rel in relationships:
545
+ source = rel['source'] # dependency
546
+ target = rel['target'] # kit that depends on it
547
+
548
+ source_day = production_timing.get(source, 0)
549
+ target_day = production_timing.get(target, 0)
550
+
551
+ if source_day > 0 and target_day > 0 and source_day > target_day:
552
+ days_early = source_day - target_day
553
+ severity = 'high' if days_early > 2 else 'medium' if days_early > 1 else 'low'
554
+
555
+ violations.append({
556
+ 'kit': target,
557
+ 'dependency': source,
558
+ 'kit_day': target_day,
559
+ 'dependency_day': source_day,
560
+ 'days_early': days_early,
561
+ 'severity': severity
562
+ })
563
+
564
+ return violations
565
+
566
+ def analyze_dependency_timing(production_timing, relationships):
567
+ """Analyze timing of all dependency relationships"""
568
+ timing_analysis = []
569
+
570
+ for rel in relationships:
571
+ source = rel['source']
572
+ target = rel['target']
573
+
574
+ source_day = production_timing.get(source, 0)
575
+ target_day = production_timing.get(target, 0)
576
+
577
+ if source_day > 0 and target_day > 0:
578
+ timing_diff = target_day - source_day
579
+ status = "✅ Correct" if timing_diff >= 0 else "❌ Violation"
580
+
581
+ timing_analysis.append({
582
+ 'Kit': target[:20] + "..." if len(target) > 20 else target,
583
+ 'Dependency': source[:20] + "..." if len(source) > 20 else source,
584
+ 'Kit Day': target_day,
585
+ 'Dep Day': source_day,
586
+ 'Gap (Days)': timing_diff,
587
+ 'Status': status
588
+ })
589
+
590
+ return sorted(timing_analysis, key=lambda x: x['Gap (Days)'])
591
+
592
+ def generate_dependency_recommendations(violations, relationships, production_timing):
593
+ """Generate recommendations based on dependency analysis"""
594
+ recommendations = []
595
+
596
+ if not violations:
597
+ recommendations.append("Excellent! All dependencies are being fulfilled in the correct order.")
598
+ return recommendations
599
+
600
+ # Group violations by severity
601
+ high_severity = [v for v in violations if v['severity'] == 'high']
602
+ medium_severity = [v for v in violations if v['severity'] == 'medium']
603
+
604
+ if high_severity:
605
+ recommendations.append(
606
+ f"🚨 High Priority: {len(high_severity)} critical dependency violations found. "
607
+ "Consider rescheduling production to ensure dependencies are produced first."
608
+ )
609
+
610
+ if medium_severity:
611
+ recommendations.append(
612
+ f"⚠️ Medium Priority: {len(medium_severity)} moderate dependency timing issues. "
613
+ "Review production sequence for optimization opportunities."
614
+ )
615
+
616
+ # Most problematic kits
617
+ problem_kits = {}
618
+ for v in violations:
619
+ kit = v['kit']
620
+ problem_kits[kit] = problem_kits.get(kit, 0) + 1
621
+
622
+ if problem_kits:
623
+ worst_kit = max(problem_kits.items(), key=lambda x: x[1])
624
+ recommendations.append(
625
+ f"🎯 Focus Area: Kit {worst_kit[0]} has {worst_kit[1]} dependency issues. "
626
+ "Consider moving its production later in the schedule."
627
+ )
628
+
629
+ return recommendations