Spaces:
Running
Running
Update updated_word.py
Browse files- updated_word.py +130 -52
updated_word.py
CHANGED
|
@@ -648,7 +648,7 @@ def fix_management_summary_details_column(table, flat_json):
|
|
| 648 |
# ============================================================================
|
| 649 |
def fix_operator_declaration_empty_values(table, flat_json):
|
| 650 |
"""
|
| 651 |
-
|
| 652 |
"""
|
| 653 |
replacements_made = 0
|
| 654 |
print(f" 🎯 FIX: Operator Declaration empty values processing")
|
|
@@ -673,15 +673,38 @@ def fix_operator_declaration_empty_values(table, flat_json):
|
|
| 673 |
if len(value) == 0:
|
| 674 |
return None, None
|
| 675 |
if len(value) == 1:
|
| 676 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 677 |
|
| 678 |
-
# Handle [name, position] pattern
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 682 |
return first, second
|
| 683 |
|
| 684 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 685 |
value = " ".join(str(v).strip() for v in value if str(v).strip())
|
| 686 |
|
| 687 |
s = str(value).strip()
|
|
@@ -704,7 +727,8 @@ def fix_operator_declaration_empty_values(table, flat_json):
|
|
| 704 |
# Check which part is more likely to be a position
|
| 705 |
role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
|
| 706 |
'coordinator', 'driver', 'operator', 'representative', 'chief',
|
| 707 |
-
'president', 'ceo', 'cfo', 'secretary', 'treasurer'
|
|
|
|
| 708 |
|
| 709 |
right_has_role = any(ind in right.lower() for ind in role_indicators)
|
| 710 |
left_has_role = any(ind in left.lower() for ind in role_indicators)
|
|
@@ -722,7 +746,7 @@ def fix_operator_declaration_empty_values(table, flat_json):
|
|
| 722 |
if len(tokens) >= 2:
|
| 723 |
last_token = tokens[-1].lower()
|
| 724 |
role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
|
| 725 |
-
'coordinator', 'driver', 'operator', 'representative', 'chief']
|
| 726 |
if any(ind == last_token for ind in role_indicators):
|
| 727 |
return " ".join(tokens[:-1]), tokens[-1]
|
| 728 |
|
|
@@ -795,60 +819,114 @@ def fix_operator_declaration_empty_values(table, flat_json):
|
|
| 795 |
final_name = None
|
| 796 |
final_position = None
|
| 797 |
|
| 798 |
-
#
|
| 799 |
-
|
| 800 |
-
|
| 801 |
-
|
| 802 |
-
|
| 803 |
-
|
| 804 |
-
|
| 805 |
-
|
| 806 |
-
|
| 807 |
-
("Operator Information.Print Name", "Operator Information.Position Title"),
|
| 808 |
|
| 809 |
-
#
|
| 810 |
-
(
|
| 811 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 812 |
|
| 813 |
-
|
| 814 |
-
|
| 815 |
-
|
|
|
|
|
|
|
|
|
|
| 816 |
|
| 817 |
-
|
| 818 |
-
|
| 819 |
-
|
| 820 |
-
|
| 821 |
-
|
| 822 |
-
|
| 823 |
-
|
| 824 |
-
|
| 825 |
-
|
| 826 |
-
|
| 827 |
-
|
| 828 |
-
|
| 829 |
-
|
| 830 |
-
|
| 831 |
-
|
| 832 |
-
|
| 833 |
-
|
| 834 |
-
|
| 835 |
-
|
| 836 |
-
else:
|
| 837 |
-
# Search for specific key patterns
|
| 838 |
name_kv = find_matching_json_key_and_value(name_key_pattern, flat_json)
|
| 839 |
pos_kv = find_matching_json_key_and_value(pos_key_pattern, flat_json)
|
| 840 |
|
| 841 |
if name_kv and name_kv[1]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 842 |
name_from_val, pos_from_val = parse_name_and_position(name_kv[1])
|
| 843 |
if name_from_val and looks_like_person_name(name_from_val):
|
| 844 |
-
|
| 845 |
-
if
|
| 846 |
-
|
|
|
|
|
|
|
| 847 |
|
| 848 |
if pos_kv and pos_kv[1] and not final_position:
|
| 849 |
-
|
| 850 |
-
if
|
| 851 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 852 |
|
| 853 |
# Clean up final values
|
| 854 |
if isinstance(final_name, (list, tuple)):
|
|
|
|
| 648 |
# ============================================================================
|
| 649 |
def fix_operator_declaration_empty_values(table, flat_json):
|
| 650 |
"""
|
| 651 |
+
FIXED: Properly distinguish between auditor and operator data for Operator Declaration table
|
| 652 |
"""
|
| 653 |
replacements_made = 0
|
| 654 |
print(f" 🎯 FIX: Operator Declaration empty values processing")
|
|
|
|
| 673 |
if len(value) == 0:
|
| 674 |
return None, None
|
| 675 |
if len(value) == 1:
|
| 676 |
+
# Check if single item looks like "Name - Position" format
|
| 677 |
+
single_item = str(value[0]).strip()
|
| 678 |
+
if ' - ' in single_item:
|
| 679 |
+
parts = single_item.split(' - ', 1)
|
| 680 |
+
if len(parts) == 2:
|
| 681 |
+
return parts[0].strip(), parts[1].strip()
|
| 682 |
+
return single_item, None
|
| 683 |
|
| 684 |
+
# Handle [name, position] pattern or multiple attendance entries
|
| 685 |
+
if len(value) == 2:
|
| 686 |
+
first = str(value[0]).strip()
|
| 687 |
+
second = str(value[1]).strip()
|
| 688 |
+
|
| 689 |
+
# Check if both look like names (attendance list pattern)
|
| 690 |
+
if (' ' in first and ' ' in second and
|
| 691 |
+
not any(role in first.lower() for role in ['manager', 'director', 'auditor', 'officer']) and
|
| 692 |
+
not any(role in second.lower() for role in ['manager', 'director', 'auditor', 'officer'])):
|
| 693 |
+
# This is likely attendance list data, return first name only
|
| 694 |
+
return first, None
|
| 695 |
+
|
| 696 |
return first, second
|
| 697 |
|
| 698 |
+
# Multiple items - check if it's attendance list format
|
| 699 |
+
attendance_like = any(' - ' in str(item) for item in value)
|
| 700 |
+
if attendance_like:
|
| 701 |
+
# Extract first person's name from attendance format
|
| 702 |
+
first_entry = str(value[0]).strip()
|
| 703 |
+
if ' - ' in first_entry:
|
| 704 |
+
return first_entry.split(' - ')[0].strip(), first_entry.split(' - ')[1].strip()
|
| 705 |
+
return first_entry, None
|
| 706 |
+
|
| 707 |
+
# Join list elements as fallback
|
| 708 |
value = " ".join(str(v).strip() for v in value if str(v).strip())
|
| 709 |
|
| 710 |
s = str(value).strip()
|
|
|
|
| 727 |
# Check which part is more likely to be a position
|
| 728 |
role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
|
| 729 |
'coordinator', 'driver', 'operator', 'representative', 'chief',
|
| 730 |
+
'president', 'ceo', 'cfo', 'secretary', 'treasurer', 'officer',
|
| 731 |
+
'compliance']
|
| 732 |
|
| 733 |
right_has_role = any(ind in right.lower() for ind in role_indicators)
|
| 734 |
left_has_role = any(ind in left.lower() for ind in role_indicators)
|
|
|
|
| 746 |
if len(tokens) >= 2:
|
| 747 |
last_token = tokens[-1].lower()
|
| 748 |
role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
|
| 749 |
+
'coordinator', 'driver', 'operator', 'representative', 'chief', 'officer']
|
| 750 |
if any(ind == last_token for ind in role_indicators):
|
| 751 |
return " ".join(tokens[:-1]), tokens[-1]
|
| 752 |
|
|
|
|
| 819 |
final_name = None
|
| 820 |
final_position = None
|
| 821 |
|
| 822 |
+
# IMPROVED: Better strategy to find OPERATOR (not auditor) data
|
| 823 |
+
final_name = None
|
| 824 |
+
final_position = None
|
| 825 |
+
|
| 826 |
+
# Strategy 1: Look specifically in Attendance List for operator names
|
| 827 |
+
attendance_kv = find_matching_json_key_and_value("Attendance List (Names and Position Titles)", flat_json)
|
| 828 |
+
if attendance_kv and attendance_kv[1]:
|
| 829 |
+
attendance_data = attendance_kv[1]
|
| 830 |
+
print(f" 📋 Found attendance data: {attendance_data}")
|
|
|
|
| 831 |
|
| 832 |
+
# Parse attendance list to find non-auditor names
|
| 833 |
+
if isinstance(attendance_data, list):
|
| 834 |
+
for entry in attendance_data:
|
| 835 |
+
entry_str = str(entry).strip()
|
| 836 |
+
if 'auditor' not in entry_str.lower() and entry_str:
|
| 837 |
+
# Parse this entry for name and position
|
| 838 |
+
parsed_name, parsed_pos = parse_name_and_position(entry_str)
|
| 839 |
+
if parsed_name and looks_like_person_name(parsed_name):
|
| 840 |
+
final_name = parsed_name
|
| 841 |
+
if parsed_pos and looks_like_role(parsed_pos):
|
| 842 |
+
final_position = parsed_pos
|
| 843 |
+
break
|
| 844 |
|
| 845 |
+
# Strategy 2: If no good name from attendance, try nested attendance keys
|
| 846 |
+
if not final_name:
|
| 847 |
+
nested_attendance_kv = find_matching_json_key_and_value("Attendance List (Names and Position Titles).Attendance List (Names and Position Titles)", flat_json)
|
| 848 |
+
if nested_attendance_kv and nested_attendance_kv[1]:
|
| 849 |
+
nested_data = nested_attendance_kv[1]
|
| 850 |
+
print(f" 📋 Found nested attendance data: {nested_data}")
|
| 851 |
|
| 852 |
+
if isinstance(nested_data, list):
|
| 853 |
+
for entry in nested_data:
|
| 854 |
+
entry_str = str(entry).strip()
|
| 855 |
+
if 'auditor' not in entry_str.lower() and entry_str:
|
| 856 |
+
parsed_name, parsed_pos = parse_name_and_position(entry_str)
|
| 857 |
+
if parsed_name and looks_like_person_name(parsed_name):
|
| 858 |
+
final_name = parsed_name
|
| 859 |
+
if parsed_pos and looks_like_role(parsed_pos):
|
| 860 |
+
final_position = parsed_pos
|
| 861 |
+
break
|
| 862 |
+
|
| 863 |
+
# Strategy 3: Direct operator declaration keys (with filtering)
|
| 864 |
+
if not final_name:
|
| 865 |
+
search_strategies = [
|
| 866 |
+
("Operator Declaration.Print Name", "Operator Declaration.Position Title"),
|
| 867 |
+
("Print Name", "Position Title"),
|
| 868 |
+
]
|
| 869 |
+
|
| 870 |
+
for name_key_pattern, pos_key_pattern in search_strategies:
|
|
|
|
|
|
|
| 871 |
name_kv = find_matching_json_key_and_value(name_key_pattern, flat_json)
|
| 872 |
pos_kv = find_matching_json_key_and_value(pos_key_pattern, flat_json)
|
| 873 |
|
| 874 |
if name_kv and name_kv[1]:
|
| 875 |
+
# Filter out auditor names
|
| 876 |
+
potential_name = str(name_kv[1]).strip()
|
| 877 |
+
|
| 878 |
+
# Skip if this is clearly auditor data
|
| 879 |
+
if name_kv[0] and 'auditor' in name_kv[0].lower():
|
| 880 |
+
continue
|
| 881 |
+
|
| 882 |
+
# Skip common auditor names that appear in our data
|
| 883 |
+
auditor_names = ['greg dyer', 'greg', 'dyer']
|
| 884 |
+
if any(aud_name in potential_name.lower() for aud_name in auditor_names):
|
| 885 |
+
continue
|
| 886 |
+
|
| 887 |
name_from_val, pos_from_val = parse_name_and_position(name_kv[1])
|
| 888 |
if name_from_val and looks_like_person_name(name_from_val):
|
| 889 |
+
# Additional check - avoid auditor names
|
| 890 |
+
if not any(aud_name in name_from_val.lower() for aud_name in auditor_names):
|
| 891 |
+
final_name = name_from_val
|
| 892 |
+
if pos_from_val and looks_like_role(pos_from_val):
|
| 893 |
+
final_position = pos_from_val
|
| 894 |
|
| 895 |
if pos_kv and pos_kv[1] and not final_position:
|
| 896 |
+
# Only use if key doesn't indicate auditor data
|
| 897 |
+
if not (pos_kv[0] and 'auditor' in pos_kv[0].lower()):
|
| 898 |
+
pos_val = str(pos_kv[1]).strip()
|
| 899 |
+
if looks_like_role(pos_val) and 'auditor' not in pos_val.lower():
|
| 900 |
+
final_position = pos_val
|
| 901 |
+
|
| 902 |
+
if final_name:
|
| 903 |
+
break
|
| 904 |
+
|
| 905 |
+
# Strategy 4: Last resort - search all keys but with strict filtering
|
| 906 |
+
if not final_name:
|
| 907 |
+
print(f" 🔍 Searching all keys with strict operator filtering...")
|
| 908 |
+
for key, value in flat_json.items():
|
| 909 |
+
key_lower = key.lower()
|
| 910 |
+
|
| 911 |
+
# Skip keys that clearly relate to auditor
|
| 912 |
+
if 'auditor' in key_lower:
|
| 913 |
+
continue
|
| 914 |
+
|
| 915 |
+
# Look for operator-related keys
|
| 916 |
+
if (("operator" in key_lower and "name" in key_lower) or
|
| 917 |
+
("print name" in key_lower and "operator" in key_lower)):
|
| 918 |
+
|
| 919 |
+
if value and looks_like_person_name(str(value)):
|
| 920 |
+
potential_name = str(value).strip()
|
| 921 |
+
# Skip auditor names
|
| 922 |
+
auditor_names = ['greg dyer', 'greg', 'dyer']
|
| 923 |
+
if not any(aud_name in potential_name.lower() for aud_name in auditor_names):
|
| 924 |
+
name_from_val, pos_from_val = parse_name_and_position(value)
|
| 925 |
+
if name_from_val and looks_like_person_name(name_from_val):
|
| 926 |
+
final_name = name_from_val
|
| 927 |
+
if pos_from_val and looks_like_role(pos_from_val):
|
| 928 |
+
final_position = pos_from_val
|
| 929 |
+
break
|
| 930 |
|
| 931 |
# Clean up final values
|
| 932 |
if isinstance(final_name, (list, tuple)):
|