Shami96 commited on
Commit
48fb6ed
·
verified ·
1 Parent(s): 543101d

Update updated_word.py

Browse files
Files changed (1) hide show
  1. updated_word.py +130 -52
updated_word.py CHANGED
@@ -648,7 +648,7 @@ def fix_management_summary_details_column(table, flat_json):
648
  # ============================================================================
649
  def fix_operator_declaration_empty_values(table, flat_json):
650
  """
651
- IMPROVED: Better operator declaration handling with more reliable position detection
652
  """
653
  replacements_made = 0
654
  print(f" 🎯 FIX: Operator Declaration empty values processing")
@@ -673,15 +673,38 @@ def fix_operator_declaration_empty_values(table, flat_json):
673
  if len(value) == 0:
674
  return None, None
675
  if len(value) == 1:
676
- return str(value[0]).strip(), None
 
 
 
 
 
 
677
 
678
- # Handle [name, position] pattern
679
- first = str(value[0]).strip()
680
- second = str(value[1]).strip()
681
- if first and second:
 
 
 
 
 
 
 
 
682
  return first, second
683
 
684
- # Join list elements
 
 
 
 
 
 
 
 
 
685
  value = " ".join(str(v).strip() for v in value if str(v).strip())
686
 
687
  s = str(value).strip()
@@ -704,7 +727,8 @@ def fix_operator_declaration_empty_values(table, flat_json):
704
  # Check which part is more likely to be a position
705
  role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
706
  'coordinator', 'driver', 'operator', 'representative', 'chief',
707
- 'president', 'ceo', 'cfo', 'secretary', 'treasurer']
 
708
 
709
  right_has_role = any(ind in right.lower() for ind in role_indicators)
710
  left_has_role = any(ind in left.lower() for ind in role_indicators)
@@ -722,7 +746,7 @@ def fix_operator_declaration_empty_values(table, flat_json):
722
  if len(tokens) >= 2:
723
  last_token = tokens[-1].lower()
724
  role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
725
- 'coordinator', 'driver', 'operator', 'representative', 'chief']
726
  if any(ind == last_token for ind in role_indicators):
727
  return " ".join(tokens[:-1]), tokens[-1]
728
 
@@ -795,60 +819,114 @@ def fix_operator_declaration_empty_values(table, flat_json):
795
  final_name = None
796
  final_position = None
797
 
798
- # Search strategies in order of preference
799
- search_strategies = [
800
- # Strategy 1: Direct operator declaration keys
801
- ("Operator Declaration.Print Name", "Operator Declaration.Position Title"),
802
-
803
- # Strategy 2: Generic print name/position keys
804
- ("Print Name", "Position Title"),
805
-
806
- # Strategy 3: Look in operator information section
807
- ("Operator Information.Print Name", "Operator Information.Position Title"),
808
 
809
- # Strategy 4: Any key containing "print name" or "position"
810
- (None, None) # Special case - will search all keys
811
- ]
 
 
 
 
 
 
 
 
 
812
 
813
- for name_key_pattern, pos_key_pattern in search_strategies:
814
- if final_name and final_position:
815
- break
 
 
 
816
 
817
- if name_key_pattern is None:
818
- # Search all keys for relevant data
819
- for key, value in flat_json.items():
820
- key_lower = key.lower()
821
-
822
- # Look for name-like keys
823
- if not final_name and ("print name" in key_lower or
824
- ("name" in key_lower and "operator" in key_lower)):
825
- if value and looks_like_person_name(str(value)):
826
- name_from_val, pos_from_val = parse_name_and_position(value)
827
- if name_from_val and looks_like_person_name(name_from_val):
828
- final_name = name_from_val
829
- if pos_from_val and looks_like_role(pos_from_val):
830
- final_position = pos_from_val
831
-
832
- # Look for position-like keys
833
- if not final_position and ("position" in key_lower or "title" in key_lower):
834
- if value and looks_like_role(str(value)):
835
- final_position = str(value).strip()
836
- else:
837
- # Search for specific key patterns
838
  name_kv = find_matching_json_key_and_value(name_key_pattern, flat_json)
839
  pos_kv = find_matching_json_key_and_value(pos_key_pattern, flat_json)
840
 
841
  if name_kv and name_kv[1]:
 
 
 
 
 
 
 
 
 
 
 
 
842
  name_from_val, pos_from_val = parse_name_and_position(name_kv[1])
843
  if name_from_val and looks_like_person_name(name_from_val):
844
- final_name = name_from_val
845
- if pos_from_val and looks_like_role(pos_from_val) and not final_position:
846
- final_position = pos_from_val
 
 
847
 
848
  if pos_kv and pos_kv[1] and not final_position:
849
- pos_val = str(pos_kv[1]).strip()
850
- if looks_like_role(pos_val):
851
- final_position = pos_val
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
852
 
853
  # Clean up final values
854
  if isinstance(final_name, (list, tuple)):
 
648
  # ============================================================================
649
  def fix_operator_declaration_empty_values(table, flat_json):
650
  """
651
+ FIXED: Properly distinguish between auditor and operator data for Operator Declaration table
652
  """
653
  replacements_made = 0
654
  print(f" 🎯 FIX: Operator Declaration empty values processing")
 
673
  if len(value) == 0:
674
  return None, None
675
  if len(value) == 1:
676
+ # Check if single item looks like "Name - Position" format
677
+ single_item = str(value[0]).strip()
678
+ if ' - ' in single_item:
679
+ parts = single_item.split(' - ', 1)
680
+ if len(parts) == 2:
681
+ return parts[0].strip(), parts[1].strip()
682
+ return single_item, None
683
 
684
+ # Handle [name, position] pattern or multiple attendance entries
685
+ if len(value) == 2:
686
+ first = str(value[0]).strip()
687
+ second = str(value[1]).strip()
688
+
689
+ # Check if both look like names (attendance list pattern)
690
+ if (' ' in first and ' ' in second and
691
+ not any(role in first.lower() for role in ['manager', 'director', 'auditor', 'officer']) and
692
+ not any(role in second.lower() for role in ['manager', 'director', 'auditor', 'officer'])):
693
+ # This is likely attendance list data, return first name only
694
+ return first, None
695
+
696
  return first, second
697
 
698
+ # Multiple items - check if it's attendance list format
699
+ attendance_like = any(' - ' in str(item) for item in value)
700
+ if attendance_like:
701
+ # Extract first person's name from attendance format
702
+ first_entry = str(value[0]).strip()
703
+ if ' - ' in first_entry:
704
+ return first_entry.split(' - ')[0].strip(), first_entry.split(' - ')[1].strip()
705
+ return first_entry, None
706
+
707
+ # Join list elements as fallback
708
  value = " ".join(str(v).strip() for v in value if str(v).strip())
709
 
710
  s = str(value).strip()
 
727
  # Check which part is more likely to be a position
728
  role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
729
  'coordinator', 'driver', 'operator', 'representative', 'chief',
730
+ 'president', 'ceo', 'cfo', 'secretary', 'treasurer', 'officer',
731
+ 'compliance']
732
 
733
  right_has_role = any(ind in right.lower() for ind in role_indicators)
734
  left_has_role = any(ind in left.lower() for ind in role_indicators)
 
746
  if len(tokens) >= 2:
747
  last_token = tokens[-1].lower()
748
  role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
749
+ 'coordinator', 'driver', 'operator', 'representative', 'chief', 'officer']
750
  if any(ind == last_token for ind in role_indicators):
751
  return " ".join(tokens[:-1]), tokens[-1]
752
 
 
819
  final_name = None
820
  final_position = None
821
 
822
+ # IMPROVED: Better strategy to find OPERATOR (not auditor) data
823
+ final_name = None
824
+ final_position = None
825
+
826
+ # Strategy 1: Look specifically in Attendance List for operator names
827
+ attendance_kv = find_matching_json_key_and_value("Attendance List (Names and Position Titles)", flat_json)
828
+ if attendance_kv and attendance_kv[1]:
829
+ attendance_data = attendance_kv[1]
830
+ print(f" 📋 Found attendance data: {attendance_data}")
 
831
 
832
+ # Parse attendance list to find non-auditor names
833
+ if isinstance(attendance_data, list):
834
+ for entry in attendance_data:
835
+ entry_str = str(entry).strip()
836
+ if 'auditor' not in entry_str.lower() and entry_str:
837
+ # Parse this entry for name and position
838
+ parsed_name, parsed_pos = parse_name_and_position(entry_str)
839
+ if parsed_name and looks_like_person_name(parsed_name):
840
+ final_name = parsed_name
841
+ if parsed_pos and looks_like_role(parsed_pos):
842
+ final_position = parsed_pos
843
+ break
844
 
845
+ # Strategy 2: If no good name from attendance, try nested attendance keys
846
+ if not final_name:
847
+ nested_attendance_kv = find_matching_json_key_and_value("Attendance List (Names and Position Titles).Attendance List (Names and Position Titles)", flat_json)
848
+ if nested_attendance_kv and nested_attendance_kv[1]:
849
+ nested_data = nested_attendance_kv[1]
850
+ print(f" 📋 Found nested attendance data: {nested_data}")
851
 
852
+ if isinstance(nested_data, list):
853
+ for entry in nested_data:
854
+ entry_str = str(entry).strip()
855
+ if 'auditor' not in entry_str.lower() and entry_str:
856
+ parsed_name, parsed_pos = parse_name_and_position(entry_str)
857
+ if parsed_name and looks_like_person_name(parsed_name):
858
+ final_name = parsed_name
859
+ if parsed_pos and looks_like_role(parsed_pos):
860
+ final_position = parsed_pos
861
+ break
862
+
863
+ # Strategy 3: Direct operator declaration keys (with filtering)
864
+ if not final_name:
865
+ search_strategies = [
866
+ ("Operator Declaration.Print Name", "Operator Declaration.Position Title"),
867
+ ("Print Name", "Position Title"),
868
+ ]
869
+
870
+ for name_key_pattern, pos_key_pattern in search_strategies:
 
 
871
  name_kv = find_matching_json_key_and_value(name_key_pattern, flat_json)
872
  pos_kv = find_matching_json_key_and_value(pos_key_pattern, flat_json)
873
 
874
  if name_kv and name_kv[1]:
875
+ # Filter out auditor names
876
+ potential_name = str(name_kv[1]).strip()
877
+
878
+ # Skip if this is clearly auditor data
879
+ if name_kv[0] and 'auditor' in name_kv[0].lower():
880
+ continue
881
+
882
+ # Skip common auditor names that appear in our data
883
+ auditor_names = ['greg dyer', 'greg', 'dyer']
884
+ if any(aud_name in potential_name.lower() for aud_name in auditor_names):
885
+ continue
886
+
887
  name_from_val, pos_from_val = parse_name_and_position(name_kv[1])
888
  if name_from_val and looks_like_person_name(name_from_val):
889
+ # Additional check - avoid auditor names
890
+ if not any(aud_name in name_from_val.lower() for aud_name in auditor_names):
891
+ final_name = name_from_val
892
+ if pos_from_val and looks_like_role(pos_from_val):
893
+ final_position = pos_from_val
894
 
895
  if pos_kv and pos_kv[1] and not final_position:
896
+ # Only use if key doesn't indicate auditor data
897
+ if not (pos_kv[0] and 'auditor' in pos_kv[0].lower()):
898
+ pos_val = str(pos_kv[1]).strip()
899
+ if looks_like_role(pos_val) and 'auditor' not in pos_val.lower():
900
+ final_position = pos_val
901
+
902
+ if final_name:
903
+ break
904
+
905
+ # Strategy 4: Last resort - search all keys but with strict filtering
906
+ if not final_name:
907
+ print(f" 🔍 Searching all keys with strict operator filtering...")
908
+ for key, value in flat_json.items():
909
+ key_lower = key.lower()
910
+
911
+ # Skip keys that clearly relate to auditor
912
+ if 'auditor' in key_lower:
913
+ continue
914
+
915
+ # Look for operator-related keys
916
+ if (("operator" in key_lower and "name" in key_lower) or
917
+ ("print name" in key_lower and "operator" in key_lower)):
918
+
919
+ if value and looks_like_person_name(str(value)):
920
+ potential_name = str(value).strip()
921
+ # Skip auditor names
922
+ auditor_names = ['greg dyer', 'greg', 'dyer']
923
+ if not any(aud_name in potential_name.lower() for aud_name in auditor_names):
924
+ name_from_val, pos_from_val = parse_name_and_position(value)
925
+ if name_from_val and looks_like_person_name(name_from_val):
926
+ final_name = name_from_val
927
+ if pos_from_val and looks_like_role(pos_from_val):
928
+ final_position = pos_from_val
929
+ break
930
 
931
  # Clean up final values
932
  if isinstance(final_name, (list, tuple)):