haileyhalimj@gmail.com
Refactor optimization configuration and constants integration
fa2c20f
#!/usr/bin/env python3
"""
Kit Hierarchy Parser - Converts CSV hierarchy data to optimized formats
This module provides functions to:
1. Parse Kit_Composition_and_relation.csv
2. Generate JSON hierarchy structure
3. Create production order CSV
4. Build DAG for optimization constraints
"""
import pandas as pd
import json
from typing import Dict, List, Tuple, Set
from collections import defaultdict, deque
class KitHierarchyParser:
"""
Parses kit composition data and creates hierarchy structures
for production order optimization.
"""
def __init__(self, csv_path: str = "data/real_data_excel/converted_csv/Kit_Composition_and_relation.csv"):
self.csv_path = csv_path
self.df = None
self.hierarchy_json = {}
self.production_order_csv = []
self.dependency_graph = {'nodes': set(), 'edges': set()}
def load_data(self):
"""Load and clean the CSV data"""
self.df = pd.read_csv(self.csv_path)
print(f"Loaded {len(self.df)} rows from {self.csv_path}")
def parse_hierarchy(self) -> Dict:
"""
Parse the hierarchy from CSV into JSON structure
Returns: Nested dictionary representing the hierarchy
"""
if self.df is None:
self.load_data()
# Get unique relationships
relationships = self.df[['Master Kit', 'Master Kit Description',
'Sub kit', 'Sub kit description',
'Prepack', 'Prepack Description']].drop_duplicates()
hierarchy = defaultdict(lambda: {
'name': '',
'type': 'master',
'subkits': defaultdict(lambda: {
'name': '',
'type': 'subkit',
'prepacks': [],
'dependencies': []
}),
'dependencies': []
})
for _, row in relationships.iterrows():
master_id = row['Master Kit']
master_desc = row['Master Kit Description']
subkit_id = row['Sub kit']
subkit_desc = row['Sub kit description']
prepack_id = row['Prepack']
prepack_desc = row['Prepack Description']
if pd.notna(master_id):
# Set master info
hierarchy[master_id]['name'] = master_desc if pd.notna(master_desc) else ''
if pd.notna(subkit_id):
# Set subkit info
hierarchy[master_id]['subkits'][subkit_id]['name'] = subkit_desc if pd.notna(subkit_desc) else ''
# Add subkit to master dependencies
if subkit_id not in hierarchy[master_id]['dependencies']:
hierarchy[master_id]['dependencies'].append(subkit_id)
if pd.notna(prepack_id):
# Set prepack info
if prepack_id not in hierarchy[master_id]['subkits'][subkit_id]['prepacks']:
hierarchy[master_id]['subkits'][subkit_id]['prepacks'].append(prepack_id)
# Add prepack to subkit dependencies
if prepack_id not in hierarchy[master_id]['subkits'][subkit_id]['dependencies']:
hierarchy[master_id]['subkits'][subkit_id]['dependencies'].append(prepack_id)
elif pd.notna(prepack_id):
# Handle direct master-prepack relationship (no subkit)
# Add direct_prepacks list to hierarchy if it doesn't exist
if 'direct_prepacks' not in hierarchy[master_id]:
hierarchy[master_id]['direct_prepacks'] = []
# Add prepack directly to master
if prepack_id not in hierarchy[master_id]['direct_prepacks']:
hierarchy[master_id]['direct_prepacks'].append(prepack_id)
# Add prepack to master dependencies
if prepack_id not in hierarchy[master_id]['dependencies']:
hierarchy[master_id]['dependencies'].append(prepack_id)
# Convert defaultdict to regular dict for JSON serialization
self.hierarchy_json = json.loads(json.dumps(hierarchy, default=dict))
return self.hierarchy_json
def sort_products_by_hierarchy(product_list: List[str],
kit_levels: Dict[str, int],
kit_dependencies: Dict[str, List[str]]) -> List[str]:
"""
Sort products by hierarchy levels and dependencies using topological sorting.
Returns products in optimal production order: prepacks β†’ subkits β†’ masters
Dependencies within the same level are properly ordered.
Args:
product_list: List of product names to sort
kit_levels: Dictionary mapping product names to hierarchy levels (0=prepack, 1=subkit, 2=master)
kit_dependencies: Dictionary mapping product names to their dependencies (products that must be made first)
Returns:
List of products sorted in production order (dependencies first)
"""
# Filter products that are in our production list and have hierarchy data
products_with_hierarchy = [p for p in product_list if p in kit_levels]
products_without_hierarchy = [p for p in product_list if p not in kit_levels]
if products_without_hierarchy:
print(f"[HIERARCHY] Products without hierarchy data: {products_without_hierarchy}")
# Build dependency graph for products in our list
graph = defaultdict(list) # product -> [dependents]
in_degree = defaultdict(int) # product -> number of dependencies
# Initialize all products
for product in products_with_hierarchy:
in_degree[product] = 0
for product in products_with_hierarchy:
deps = kit_dependencies.get(product, []) # dependencies = products that has to be packed first
for dep in deps:
if dep in products_with_hierarchy: # Only if dependency is in our production list
# REVERSE THE RELATIONSHIP:
# kit_dependencies says: "product needs dep"
# graph says: "dep is needed by product"
graph[dep].append(product) # dep -> product (reverse the relationship!)
in_degree[product] += 1
# Topological sort with hierarchy level priority
sorted_products = []
# queue = able to remove from both sides
queue = deque()
# Start with products that have no dependencies
for product in products_with_hierarchy:
if in_degree[product] == 0:
queue.append(product)
while queue:
current = queue.popleft()
sorted_products.append(current)
# Process dependents - sort by hierarchy level first
for dependent in sorted(graph[current], key=lambda p: (kit_levels.get(p, 999), p)):
in_degree[dependent] -= 1 # decrement the in_degree of the dependent
if in_degree[dependent] == 0: # if the in_degree of the dependent is 0, add it to the queue so that it can be processed
queue.append(dependent)
# Check for cycles (shouldn't happen with proper hierarchy)
if len(sorted_products) != len(products_with_hierarchy):
remaining = [p for p in products_with_hierarchy if p not in sorted_products]
print(f"[HIERARCHY] WARNING: Potential circular dependencies detected in: {remaining}")
# Add remaining products sorted by level as fallback
remaining_sorted = sorted(remaining, key=lambda p: (kit_levels.get(p, 999), p))
sorted_products.extend(remaining_sorted)
# Add products without hierarchy information at the end
sorted_products.extend(sorted(products_without_hierarchy))
print(f"[HIERARCHY] Dependency-aware production order: {len(sorted_products)} products")
for i, p in enumerate(sorted_products[:10]): # Show first 10
level = kit_levels.get(p, "unknown")
# Import here to avoid circular dependency
try:
from src.config.constants import KitLevel
level_name = KitLevel.get_name(level)
except:
level_name = f"level_{level}"
deps = kit_dependencies.get(p, [])
deps_in_list = [d for d in deps if d in products_with_hierarchy]
print(f" {i+1}. {p} (level {level}={level_name}, deps: {len(deps_in_list)})")
if deps_in_list:
print(f" Dependencies: {deps_in_list}")
if len(sorted_products) > 10:
print(f" ... and {len(sorted_products) - 10} more products")
return sorted_products
def main():
"""Demo the hierarchy parser"""
parser = KitHierarchyParser()
print("πŸ”„ Parsing kit hierarchy...")
hierarchy = parser.parse_hierarchy()
#export to json
with open('data/hierarchy_exports/kit_hierarchy.json', 'w') as f:
json.dump(hierarchy, f,indent=4)
print(f"πŸ“Š Found {len(hierarchy)} master kits")
if __name__ == "__main__":
main()