File size: 13,832 Bytes
ffa1f50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
#!/usr/bin/env python3
"""
Demand Data Validation Visualization Module

Provides Streamlit visualization for demand data validation.
Shows which products are included/excluded from optimization and why.
"""

import pandas as pd
import streamlit as st
from typing import Dict
from src.config.constants import LineType
from src.demand_filtering import DemandFilter


# Simple mapping for product level names
LEVEL_NAMES = {
    'prepack': 'prepack',
    'subkit': 'subkit',
    'master': {
        'standalone': 'standalone_master',
        'with_hierarchy': 'master_with_hierarchy'
    },
    'unclassified': 'no_hierarchy_data'
}


class DemandValidationViz:
    """
    Simple visualization wrapper for demand filtering results.
    All filtering logic is in DemandFilter - this just displays the results.
    """
    
    def __init__(self):
        self.filter_instance = DemandFilter()
        self.speed_data = None
        
    def load_data(self):
        """Load all data needed for visualization"""
        try:
            from src.config import optimization_config
            from src.preprocess import extract
            self.speed_data = extract.read_package_speed_data()
            return self.filter_instance.load_data()
        except Exception as e:
            error_msg = f"Error loading data: {str(e)}"
            print(error_msg)
            if st:
                st.error(error_msg)
            return False
    
    def validate_all_products(self) -> pd.DataFrame:
        """
        Create DataFrame with validation results for all products.
        Main visualization method - converts filtering results to displayable format.
        """
        # Get analysis from filtering module
        analysis = self.filter_instance.get_complete_product_analysis()
        product_details = analysis['product_details']
        
        results = []
        for product_id, details in product_details.items():
            # Calculate production hours if speed data available
            speed = self.speed_data.get(product_id) if self.speed_data else None
            production_hours = (details['demand'] / speed) if speed and speed > 0 else None
            
            # Get line type name
            line_type_id = details['line_assignment']
            line_name = LineType.get_name(line_type_id) if line_type_id is not None else "no_assignment"
            
            # Get level name (simplified)
            ptype = details['product_type']
            if ptype == 'unclassified':
                level_name = LEVEL_NAMES['unclassified']
            elif ptype == 'master':
                level_name = LEVEL_NAMES['master']['standalone' if details['is_standalone_master'] else 'with_hierarchy']
            else:
                level_name = LEVEL_NAMES.get(ptype, f"level_{ptype}")
            
            # Build validation status message
            if not details['is_included_in_optimization']:
                validation_status = f"🚫 Excluded: {', '.join(details['exclusion_reasons'])}"
            else:
                issues = []
                if speed is None:
                    issues.append("missing_speed_data (will use default)")
                if not details['has_hierarchy']:
                    issues.append("no_hierarchy_data")
                validation_status = f"⚠️ Data Issues: {', '.join(issues)}" if issues else "βœ… Ready for optimization"



            if details['has_too_high_demand']:
                issues.append("too_high_demand")
                validation_status = f"⚠️ Data Issues: {', '.join(issues)}" if issues else "βœ… Ready for optimization"
            results.append({
                'Product ID': product_id,
                'Demand': details['demand'],
                'Product Type': ptype.title(),
                'Level': level_name,
                'Is Standalone Master': "Yes" if details['is_standalone_master'] else "No",
                'Line Type ID': line_type_id if line_type_id else "N/A",
                'Line Type': line_name,
                'UNICEF Staff': details['unicef_staff'],
                'Humanizer Staff': details['humanizer_staff'],
                'Total Staff': details['total_staff'],
                'Production Speed (units/hour)': f"{speed:.1f}" if speed else "N/A",
                'Production Hours Needed': f"{production_hours:.1f}" if production_hours else "N/A",
                'Has Line Assignment': "βœ…" if details['has_line_assignment'] else "❌",
                'Has Staffing Data': "βœ…" if details['has_staffing'] else "❌", 
                'Has Speed Data': "βœ…" if speed is not None else "❌ (will use default)",
                'Has Hierarchy Data': "βœ…" if details['has_hierarchy'] else "❌",
                'Excluded from Optimization': not details['is_included_in_optimization'],
                'Exclusion Reasons': ', '.join(details['exclusion_reasons']) if details['exclusion_reasons'] else '',
                'Data Quality Issues': ', '.join(issues) if details['is_included_in_optimization'] and 'issues' in locals() and issues else '',
                'Has Too High Demand': "βœ…" if details['has_too_high_demand'] else "❌",
                'Validation Status': validation_status
            })
        
        df = pd.DataFrame(results)
        df = df.sort_values(['Excluded from Optimization', 'Demand'], ascending=[False, False])
        return df
    
    def get_summary_statistics(self, df: pd.DataFrame) -> Dict:
        """Calculate summary statistics from validation results"""
        analysis = self.filter_instance.get_complete_product_analysis()
        included_df = df[df['Excluded from Optimization'] == False]
        
        return {
            'total_products': analysis['total_products'],
            'total_demand': analysis['total_demand'],
            'included_products': analysis['included_count'],
            'excluded_products': analysis['excluded_count'],
            'included_demand': analysis['included_demand'],
            'excluded_demand': analysis['excluded_demand'],
            'type_counts': df['Product Type'].value_counts().to_dict(),
            'no_line_assignment': len(included_df[included_df['Has Line Assignment'] == "❌"]),
            'no_staffing': len(included_df[included_df['Has Staffing Data'] == "❌"]),
            'no_speed': len(included_df[included_df['Has Speed Data'].str.contains("❌")]),
            'no_hierarchy': len(included_df[included_df['Has Hierarchy Data'] == "❌"]),
            'standalone_masters': analysis['standalone_masters_count'],
            'total_unicef_needed': sum(p['unicef_staff'] for p in analysis['product_details'].values()),
            'total_humanizer_needed': sum(p['humanizer_staff'] for p in analysis['product_details'].values()),
            'excluded_with_too_high_demand': analysis['excluded_with_too_high_demand_count']
        }


def display_demand_validation():
    """
    Display demand validation analysis in Streamlit.
    Main entry point for the validation page.
    """
    st.header("πŸ“‹ Demand Data Validation")
    st.markdown("Analysis showing which products are included/excluded from optimization and data quality status.")
    
    # Load and analyze data
    validator = DemandValidationViz()
    with st.spinner("Loading and analyzing data..."):
        if not validator.load_data():
            st.error("Failed to load data for validation.")
            return
        validation_df = validator.validate_all_products()
        stats = validator.get_summary_statistics(validation_df)
    
    # ===== SUMMARY METRICS =====
    st.subheader("πŸ“Š Summary Statistics")
    col1, col2, col3, col4 = st.columns(4)
    col1.metric("Total Products", stats['total_products'])
    col1.metric("Included in Optimization", stats['included_products'], delta="Ready")
    col2.metric("Total Demand", f"{stats['total_demand']:,}")
    col2.metric("Excluded from Optimization", stats['excluded_products'], delta="Omitted")
    col3.metric("Included Demand", f"{stats['included_demand']:,}", delta="Will be optimized")
    col3.metric("UNICEF Staff Needed", stats['total_unicef_needed'])
    col4.metric("Excluded Demand", f"{stats['excluded_demand']:,}", delta="Omitted")
    col4.metric("Humanizer Staff Needed", stats['total_humanizer_needed'])
    
    # ===== PRODUCT TYPE DISTRIBUTION =====
    st.subheader("πŸ“ˆ Product Type Distribution")
    if stats['type_counts']:
        col1, col2 = st.columns(2)
        with col1:
            type_df = pd.DataFrame(list(stats['type_counts'].items()), columns=['Product Type', 'Count'])
            st.bar_chart(type_df.set_index('Product Type'))
        with col2:
            for ptype, count in stats['type_counts'].items():
                percentage = (count / stats['total_products']) * 100
                st.write(f"**{ptype}:** {count} products ({percentage:.1f}%)")
    
    # ===== DATA QUALITY ISSUES (for included products only) =====
    st.subheader("⚠️ Data Quality Issues (Included Products)")
    st.write("Issues affecting products that **will be** included in optimization:")
    col1, col2, col3, col4 = st.columns(4)
    col1.metric("No Line Assignment", stats['no_line_assignment'], 
                delta=None if stats['no_line_assignment'] == 0 else "Issue")
    col2.metric("No Staffing Data", stats['no_staffing'],
                delta=None if stats['no_staffing'] == 0 else "Issue")
    col3.metric("No Speed Data", stats['no_speed'],
                delta=None if stats['no_speed'] == 0 else "Will use default")
    col4.metric("No Hierarchy Data", stats['no_hierarchy'],
                delta=None if stats['no_hierarchy'] == 0 else "Issue")
    col5.metric("Excluded: Too High Demand", stats['excluded_with_too_high_demand'],
                delta=None if stats['excluded_with_too_high_demand'] == 0 else "Excluded")
    # ===== INCLUDED PRODUCTS TABLE =====
    included_df = validation_df[validation_df['Excluded from Optimization'] == False].copy()
    excluded_df = validation_df[validation_df['Excluded from Optimization'] == True].copy()
    
    st.subheader("βœ… Products Included in Optimization")
    st.write(f"**{len(included_df)} products** with total demand of **{included_df['Demand'].sum():,} units**")
    
    if len(included_df) > 0:
        # Filters
        col1, col2 = st.columns(2)
        type_filter = col1.selectbox("Filter by type", ["All"] + list(included_df['Product Type'].unique()), key="inc_filter")
        min_demand = col2.number_input("Minimum demand", min_value=0, value=0, key="inc_demand")
        
        # Apply filters
        filtered = included_df.copy()
        if type_filter != "All":
            filtered = filtered[filtered['Product Type'] == type_filter]
        if min_demand > 0:
            filtered = filtered[filtered['Demand'] >= min_demand]
        
        # Display table
        display_cols = ['Product ID', 'Demand', 'Product Type', 'Line Type', 'UNICEF Staff', 
                       'Humanizer Staff', 'Production Speed (units/hour)', 'Data Quality Issues', 'Validation Status']
        st.dataframe(filtered[display_cols], use_container_width=True, height=300)
    else:
        st.warning("No products are included in optimization!")
    
    # ===== EXCLUDED PRODUCTS TABLE =====
    st.subheader("🚫 Products Excluded from Optimization")
    st.write(f"**{len(excluded_df)} products** with total demand of **{excluded_df['Demand'].sum():,} units**")
    st.info("Excluded due to: missing line assignments, zero staffing, or non-standalone masters")
    
    if len(excluded_df) > 0:
        # Show exclusion breakdown
        st.write("**Exclusion reasons:**")
        for reason, count in excluded_df['Exclusion Reasons'].value_counts().items():
            st.write(f"β€’ {reason}: {count} products")
        
        # Display table
        display_cols = ['Product ID', 'Demand', 'Product Type', 'Exclusion Reasons', 
                       'UNICEF Staff', 'Humanizer Staff', 'Line Type']
        st.dataframe(excluded_df[display_cols], use_container_width=True, height=200)
        
        # Export button
        if st.button("πŸ“₯ Export Validation Results to CSV"):
            st.download_button("Download CSV", validation_df.to_csv(index=False),
                             file_name="demand_validation_results.csv", mime="text/csv")
    
    # ===== RECOMMENDATIONS =====
    st.subheader("πŸ’‘ Recommendations")
    
    if stats['excluded_products'] > 0:
        st.warning(f"**{stats['excluded_products']} products** ({stats['excluded_demand']:,} units) excluded from optimization")
    
    # Show data quality issues for included products
    if stats['no_line_assignment'] > 0:
        st.info(f"**Line Assignment**: {stats['no_line_assignment']} included products missing line assignments")
    if stats['no_staffing'] > 0:
        st.info(f"**Staffing Data**: {stats['no_staffing']} included products missing staffing requirements")
    if stats['no_speed'] > 0:
        st.info(f"**Speed Data**: {stats['no_speed']} included products missing speed data (will use default 106.7 units/hour)")
    if stats['no_hierarchy'] > 0:
        st.info(f"**Hierarchy Data**: {stats['no_hierarchy']} included products not in kit hierarchy")
    
    # Overall status
    if stats['included_products'] > 0:
        st.success(f"βœ… **{stats['included_products']} products** with {stats['included_demand']:,} units demand ready for optimization!")
        if stats['no_speed'] == 0 and stats['no_hierarchy'] == 0:
            st.info("πŸŽ‰ All included products have complete data!")
    else:
        st.error("❌ No products passed filtering. Review exclusion reasons and check data configuration.")


if __name__ == "__main__":
    # For testing
    display_demand_validation()