Shami96 commited on
Commit
b93e8d9
Β·
verified Β·
1 Parent(s): 758c040

Update updated_word.py

Browse files
Files changed (1) hide show
  1. updated_word.py +55 -458
updated_word.py CHANGED
@@ -3,7 +3,7 @@ from docx import Document
3
  from docx.shared import RGBColor
4
  import re
5
 
6
- # Your original heading patterns (unchanged)
7
  HEADING_PATTERNS = {
8
  "main": [
9
  r"NHVAS\s+Audit\s+Summary\s+Report",
@@ -27,6 +27,10 @@ HEADING_PATTERNS = {
27
  ]
28
  }
29
 
 
 
 
 
30
  def load_json(filepath):
31
  with open(filepath, 'r') as file:
32
  return json.load(file)
@@ -60,8 +64,32 @@ def get_value_as_string(value, field_name=""):
60
  else:
61
  return str(value)
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  def find_matching_json_value(field_name, flat_json):
64
- """Your original matching function with minimal improvements"""
65
  field_name = field_name.strip()
66
 
67
  # Try exact match first
@@ -75,13 +103,11 @@ def find_matching_json_value(field_name, flat_json):
75
  print(f" βœ… Case-insensitive match found for key '{field_name}' with JSON key '{key}'")
76
  return value
77
 
78
- # 🎯 MINIMAL IMPROVEMENT: Better Print Name detection for operator vs auditor
79
  if field_name.lower().strip() == "print name":
80
- # Look in the flat_json keys to see what context we're in
81
  operator_keys = [k for k in flat_json.keys() if "operator" in k.lower() and "print name" in k.lower()]
82
  auditor_keys = [k for k in flat_json.keys() if "auditor" in k.lower() and ("print name" in k.lower() or "name" in k.lower())]
83
 
84
- # If we have operator-specific keys, prefer those in operator context
85
  if operator_keys:
86
  print(f" βœ… Operator Print Name match: '{field_name}' -> '{operator_keys[0]}'")
87
  return flat_json[operator_keys[0]]
@@ -143,22 +169,12 @@ def find_matching_json_value(field_name, flat_json):
143
  print(f" ❌ No match found for '{field_name}'")
144
  return None
145
 
146
- def get_clean_text(cell):
147
- text = ""
148
- for paragraph in cell.paragraphs:
149
- for run in paragraph.runs:
150
- text += run.text
151
- return text.strip()
152
-
153
- def has_red_text(cell):
154
- for paragraph in cell.paragraphs:
155
- for run in paragraph.runs:
156
- if is_red(run) and run.text.strip():
157
- return True
158
- return False
159
 
160
  def extract_red_text_segments(cell):
161
- """Your original red text extraction (unchanged)"""
162
  red_segments = []
163
 
164
  for para_idx, paragraph in enumerate(cell.paragraphs):
@@ -191,27 +207,8 @@ def extract_red_text_segments(cell):
191
 
192
  return red_segments
193
 
194
- def replace_red_text_in_cell(cell, replacement_text):
195
- """Your original replacement function (unchanged)"""
196
- red_segments = extract_red_text_segments(cell)
197
-
198
- if not red_segments:
199
- return 0
200
-
201
- if len(red_segments) > 1:
202
- replacements_made = 0
203
- for segment in red_segments:
204
- segment_text = segment['text'].strip()
205
- if segment_text:
206
- pass
207
-
208
- if replacements_made == 0:
209
- return replace_all_red_segments(red_segments, replacement_text)
210
-
211
- return replace_all_red_segments(red_segments, replacement_text)
212
-
213
  def replace_all_red_segments(red_segments, replacement_text):
214
- """Your original function (unchanged)"""
215
  if not red_segments:
216
  return 0
217
 
@@ -244,7 +241,7 @@ def replace_all_red_segments(red_segments, replacement_text):
244
 
245
  for line in replacement_lines[1:]:
246
  if line.strip():
247
- from docx.oxml import OxmlElement, ns
248
  br = OxmlElement('w:br')
249
  first_run.element.append(br)
250
 
@@ -259,7 +256,7 @@ def replace_all_red_segments(red_segments, replacement_text):
259
  return replacements_made
260
 
261
  def replace_single_segment(segment, replacement_text):
262
- """Your original function (unchanged)"""
263
  if not segment['runs']:
264
  return False
265
 
@@ -272,197 +269,21 @@ def replace_single_segment(segment, replacement_text):
272
 
273
  return True
274
 
275
- def handle_multiple_red_segments_in_cell(cell, flat_json):
276
- """Your original function (unchanged)"""
277
  red_segments = extract_red_text_segments(cell)
278
 
279
  if not red_segments:
280
  return 0
281
 
282
- print(f" πŸ” Found {len(red_segments)} red text segments in cell")
283
- replacements_made = 0
284
- unmatched_segments = []
285
-
286
- for i, segment in enumerate(red_segments):
287
- segment_text = segment['text'].strip()
288
- if not segment_text:
289
- continue
290
-
291
- print(f" Segment {i+1}: '{segment_text[:50]}...'")
292
-
293
- json_value = find_matching_json_value(segment_text, flat_json)
294
-
295
- if json_value is not None:
296
- replacement_text = get_value_as_string(json_value, segment_text)
297
-
298
- if isinstance(json_value, list) and len(json_value) > 1:
299
- replacement_text = "\n".join(str(item) for item in json_value if str(item).strip())
300
-
301
- success = replace_single_segment(segment, replacement_text)
302
- if success:
303
- replacements_made += 1
304
- print(f" βœ… Replaced segment '{segment_text[:30]}...' with '{replacement_text[:30]}...'")
305
- else:
306
- unmatched_segments.append(segment)
307
- print(f" ⏳ No individual match for segment '{segment_text[:30]}...'")
308
-
309
- if unmatched_segments and replacements_made == 0:
310
- combined_text = " ".join(seg['text'] for seg in red_segments).strip()
311
- print(f" πŸ”„ Trying combined text match: '{combined_text[:50]}...'")
312
-
313
- json_value = find_matching_json_value(combined_text, flat_json)
314
- if json_value is not None:
315
- replacement_text = get_value_as_string(json_value, combined_text)
316
- if isinstance(json_value, list) and len(json_value) > 1:
317
- replacement_text = "\n".join(str(item) for item in json_value if str(item).strip())
318
-
319
- replacements_made = replace_all_red_segments(red_segments, replacement_text)
320
- print(f" βœ… Replaced combined text with '{replacement_text[:50]}...'")
321
-
322
- return replacements_made
323
 
324
- # 🎯 SURGICAL FIX 1: Handle Nature of Business multi-line red text
325
- def handle_nature_business_multiline_fix(cell, flat_json):
326
- """SURGICAL FIX: Handle multi-line red text in Nature of Business cells"""
327
- if not has_red_text(cell):
328
- return 0
329
-
330
- # Check if this cell contains "Nature of the Operators Business"
331
- cell_text = get_clean_text(cell).lower()
332
- if "nature of the operators business" not in cell_text and "nature of the operator business" not in cell_text:
333
- return 0
334
-
335
- print(f" 🎯 SURGICAL FIX: Nature of Business multi-line processing")
336
-
337
- # Look for sub-fields like "Accreditation Number:" and "Expiry Date:"
338
- red_segments = extract_red_text_segments(cell)
339
- replacements_made = 0
340
-
341
- # Try to replace each segment individually first
342
- for segment in red_segments:
343
- segment_text = segment['text'].strip()
344
- if not segment_text:
345
- continue
346
-
347
- json_value = find_matching_json_value(segment_text, flat_json)
348
- if json_value is not None:
349
- replacement_text = get_value_as_string(json_value, segment_text)
350
- success = replace_single_segment(segment, replacement_text)
351
- if success:
352
- replacements_made += 1
353
- print(f" βœ… Fixed segment: '{segment_text[:30]}...'")
354
-
355
- # If no individual matches, try combined approach
356
- if replacements_made == 0 and red_segments:
357
- combined_text = " ".join(seg['text'] for seg in red_segments).strip()
358
- json_value = find_matching_json_value(combined_text, flat_json)
359
- if json_value is not None:
360
- replacement_text = get_value_as_string(json_value, combined_text)
361
- replacements_made = replace_all_red_segments(red_segments, replacement_text)
362
- print(f" βœ… Fixed combined text")
363
-
364
- return replacements_made
365
-
366
- # 🎯 SURGICAL FIX 2: Handle Operator Declaration table with context awareness
367
- def handle_operator_declaration_fix(table, flat_json):
368
- """SURGICAL FIX: Handle Operator Declaration Print Name and Position Title with better context detection"""
369
- replacements_made = 0
370
-
371
- # Build table context to understand what type of declaration this is
372
- table_context = ""
373
- for row in table.rows:
374
- for cell in row.cells:
375
- table_context += get_clean_text(cell).lower() + " "
376
-
377
- # Determine if this is an operator declaration vs auditor declaration
378
- is_operator_declaration = any(keyword in table_context for keyword in [
379
- "hereby acknowledge", "findings detailed", "management system",
380
- "accreditation to be shared", "operator signature"
381
- ])
382
-
383
- is_auditor_declaration = any(keyword in table_context for keyword in [
384
- "nhvas approved auditor", "auditor registration", "hereby certify",
385
- "auditor signature"
386
- ])
387
-
388
- # Process the table based on context
389
- for row_idx, row in enumerate(table.rows):
390
- if len(row.cells) >= 2:
391
- cell1_text = get_clean_text(row.cells[0]).strip()
392
- cell2_text = get_clean_text(row.cells[1]).strip()
393
-
394
- # Check if this is a header row with Print Name and Position Title
395
- if ("print name" in cell1_text.lower() and "position title" in cell2_text.lower() and
396
- len(table.rows) <= 4): # Small table only
397
-
398
- context_type = "Operator" if is_operator_declaration else ("Auditor" if is_auditor_declaration else "Unknown")
399
- print(f" 🎯 SURGICAL FIX: {context_type} Declaration table detected")
400
-
401
- # Look for the data row (should be next row)
402
- if row_idx + 1 < len(table.rows):
403
- data_row = table.rows[row_idx + 1]
404
- if len(data_row.cells) >= 2:
405
- name_cell = data_row.cells[0]
406
- position_cell = data_row.cells[1]
407
-
408
- # Fix Print Name based on context
409
- if has_red_text(name_cell):
410
- name_value = None
411
-
412
- if is_operator_declaration:
413
- # Try operator-specific fields first
414
- for field_attempt in ["Operator Declaration.Print Name", "operator.print name", "Print Name"]:
415
- name_value = find_matching_json_value(field_attempt, flat_json)
416
- if name_value is not None:
417
- break
418
- elif is_auditor_declaration:
419
- # Try auditor-specific fields first
420
- for field_attempt in ["NHVAS Approved Auditor Declaration.Print Name", "auditor name", "auditor", "Print Name"]:
421
- name_value = find_matching_json_value(field_attempt, flat_json)
422
- if name_value is not None:
423
- break
424
- else:
425
- # Fallback to generic
426
- name_value = find_matching_json_value("Print Name", flat_json)
427
-
428
- if name_value is not None:
429
- name_text = get_value_as_string(name_value)
430
- cell_replacements = replace_red_text_in_cell(name_cell, name_text)
431
- replacements_made += cell_replacements
432
- print(f" βœ… Fixed {context_type} Print Name: '{name_text}'")
433
-
434
- # Fix Position Title based on context
435
- if has_red_text(position_cell):
436
- position_value = None
437
-
438
- if is_operator_declaration:
439
- # Try operator-specific fields first
440
- for field_attempt in ["Operator Declaration.Position Title", "operator.position title", "Position Title"]:
441
- position_value = find_matching_json_value(field_attempt, flat_json)
442
- if position_value is not None:
443
- break
444
- elif is_auditor_declaration:
445
- # Try auditor registration number for auditor declarations
446
- for field_attempt in ["NHVR or Exemplar Global Auditor Registration Number", "auditor registration", "registration number"]:
447
- position_value = find_matching_json_value(field_attempt, flat_json)
448
- if position_value is not None:
449
- break
450
- else:
451
- # Fallback to generic
452
- position_value = find_matching_json_value("Position Title", flat_json)
453
-
454
- if position_value is not None:
455
- position_text = get_value_as_string(position_value)
456
- cell_replacements = replace_red_text_in_cell(position_cell, position_text)
457
- replacements_made += cell_replacements
458
- print(f" βœ… Fixed {context_type} Position/Registration: '{position_text}'")
459
-
460
- break # Found the table, stop looking
461
-
462
- return replacements_made
463
 
464
  def handle_australian_company_number(row, company_numbers):
465
- """Your original function (unchanged)"""
466
  replacements_made = 0
467
  for i, digit in enumerate(company_numbers):
468
  cell_idx = i + 1
@@ -475,7 +296,7 @@ def handle_australian_company_number(row, company_numbers):
475
  return replacements_made
476
 
477
  def handle_vehicle_registration_table(table, flat_json):
478
- """Your original function (unchanged)"""
479
  replacements_made = 0
480
 
481
  # Try to find vehicle registration data
@@ -617,122 +438,6 @@ def handle_vehicle_registration_table(table, flat_json):
617
 
618
  return replacements_made
619
 
620
- def handle_print_accreditation_section(table, flat_json):
621
- """Your original function (unchanged)"""
622
- replacements_made = 0
623
-
624
- print_data = flat_json.get("print accreditation name.print accreditation name", [])
625
- if not isinstance(print_data, list) or len(print_data) < 2:
626
- return 0
627
-
628
- name_value = print_data[0]
629
- position_value = print_data[1]
630
-
631
- print(f" πŸ“‹ Print accreditation data: Name='{name_value}', Position='{position_value}'")
632
-
633
- for row_idx, row in enumerate(table.rows):
634
- if len(row.cells) >= 2:
635
- cell1_text = get_clean_text(row.cells[0]).lower()
636
- cell2_text = get_clean_text(row.cells[1]).lower()
637
-
638
- if "print name" in cell1_text and "position title" in cell2_text:
639
- print(f" πŸ“ Found header row {row_idx + 1}: '{cell1_text}' | '{cell2_text}'")
640
-
641
- if row_idx + 1 < len(table.rows):
642
- data_row = table.rows[row_idx + 1]
643
- if len(data_row.cells) >= 2:
644
- if has_red_text(data_row.cells[0]):
645
- cell_replacements = replace_red_text_in_cell(data_row.cells[0], name_value)
646
- replacements_made += cell_replacements
647
- if cell_replacements > 0:
648
- print(f" βœ… Replaced Print Name: '{name_value}'")
649
-
650
- if has_red_text(data_row.cells[1]):
651
- cell_replacements = replace_red_text_in_cell(data_row.cells[1], position_value)
652
- replacements_made += cell_replacements
653
- if cell_replacements > 0:
654
- print(f" βœ… Replaced Position Title: '{position_value}'")
655
-
656
- break
657
-
658
- return replacements_made
659
-
660
- def process_single_column_sections(cell, field_name, flat_json):
661
- """Your original function (unchanged)"""
662
- json_value = find_matching_json_value(field_name, flat_json)
663
- if json_value is not None:
664
- replacement_text = get_value_as_string(json_value, field_name)
665
- if isinstance(json_value, list) and len(json_value) > 1:
666
- replacement_text = "\n".join(str(item) for item in json_value)
667
- if has_red_text(cell):
668
- print(f" βœ… Replacing red text in single-column section: '{field_name}'")
669
- print(f" βœ… Replacement text:\n{replacement_text}")
670
- cell_replacements = replace_red_text_in_cell(cell, replacement_text)
671
- if cell_replacements > 0:
672
- print(f" -> Replaced with: '{replacement_text[:100]}...'")
673
- return cell_replacements
674
- return 0
675
-
676
- def replace_red_text_with_line_breaks(cell, attendance_list):
677
- """Custom function to replace red text with properly formatted attendance list"""
678
- replacements_made = 0
679
-
680
- # Find all red text runs and their paragraphs
681
- red_runs = []
682
- target_paragraph = None
683
-
684
- for paragraph in cell.paragraphs:
685
- for run in paragraph.runs:
686
- if is_red(run) and run.text.strip():
687
- red_runs.append(run)
688
- if target_paragraph is None:
689
- target_paragraph = paragraph
690
-
691
- if not red_runs or not target_paragraph:
692
- return 0
693
-
694
- print(f" πŸ”§ Found {len(red_runs)} red runs to replace")
695
-
696
- # Clear all red text first
697
- for run in red_runs:
698
- run.text = ''
699
-
700
- # Add the first attendance item to the first red run
701
- if len(attendance_list) > 0 and red_runs:
702
- first_run = red_runs[0]
703
- first_run.text = str(attendance_list[0]).strip()
704
- first_run.font.color.rgb = RGBColor(0, 0, 0) # Make it black
705
- replacements_made += 1
706
- print(f" βœ… Added first item: '{attendance_list[0]}'")
707
-
708
- # Add remaining items with line breaks
709
- for item in attendance_list[1:]:
710
- item_text = str(item).strip()
711
- if item_text:
712
- try:
713
- # Method 1: Try to add line break and new run to the same paragraph
714
- from docx.oxml import OxmlElement
715
- br = OxmlElement('w:br')
716
- first_run._element.append(br)
717
-
718
- # Add the text to the same run after the line break
719
- first_run.text += item_text
720
- replacements_made += 1
721
- print(f" βœ… Added item with line break: '{item_text}'")
722
- except Exception as e:
723
- print(f" ⚠️ Line break method failed: {e}")
724
- # Fallback: try to add as new paragraph
725
- try:
726
- new_para = cell.add_paragraph()
727
- new_run = new_para.add_run(item_text)
728
- new_run.font.color.rgb = RGBColor(0, 0, 0)
729
- replacements_made += 1
730
- print(f" βœ… Added as new paragraph: '{item_text}'")
731
- except Exception as e2:
732
- print(f" ❌ Both methods failed: {e2}")
733
-
734
- return replacements_made
735
-
736
  def handle_attendance_list_table_enhanced(table, flat_json):
737
  """Enhanced Attendance List processing with better detection"""
738
  replacements_made = 0
@@ -746,7 +451,6 @@ def handle_attendance_list_table_enhanced(table, flat_json):
746
 
747
  # Scan all cells in the first few rows for attendance list indicators
748
  found_attendance_row = None
749
- found_attendance_cell = None
750
 
751
  for row_idx, row in enumerate(table.rows[:3]): # Check first 3 rows
752
  for cell_idx, cell in enumerate(row.cells):
@@ -755,7 +459,6 @@ def handle_attendance_list_table_enhanced(table, flat_json):
755
  # Check if this cell contains attendance list header
756
  if any(pattern in cell_text for pattern in attendance_patterns):
757
  found_attendance_row = row_idx
758
- found_attendance_cell = cell_idx
759
  print(f" 🎯 ENHANCED: Found Attendance List in row {row_idx + 1}, cell {cell_idx + 1}")
760
  break
761
 
@@ -765,7 +468,7 @@ def handle_attendance_list_table_enhanced(table, flat_json):
765
  if found_attendance_row is None:
766
  return 0
767
 
768
- # πŸ”§ FIX: Look for attendance data in JSON
769
  attendance_value = None
770
  attendance_search_keys = [
771
  "Attendance List (Names and Position Titles).Attendance List (Names and Position Titles)",
@@ -787,7 +490,7 @@ def handle_attendance_list_table_enhanced(table, flat_json):
787
  print(f" ❌ No attendance data found in JSON")
788
  return 0
789
 
790
- # πŸ”§ CRITICAL FIX: Look for red text in ALL cells of the table, not just the header
791
  target_cell = None
792
 
793
  print(f" πŸ” Scanning ALL cells in attendance table for red text...")
@@ -821,7 +524,7 @@ def handle_attendance_list_table_enhanced(table, flat_json):
821
  print(f" ⚠️ No red text found that looks like attendance data")
822
  return 0
823
 
824
- # πŸ”§ NEW FIX: Use custom function to handle line breaks properly
825
  if has_red_text(target_cell):
826
  print(f" πŸ”§ Replacing red text with properly formatted attendance list...")
827
 
@@ -835,125 +538,21 @@ def handle_attendance_list_table_enhanced(table, flat_json):
835
  for i, item in enumerate(attendance_list):
836
  print(f" {i+1}. {item}")
837
 
838
- # Use our custom function that handles line breaks properly
839
- cell_replacements = replace_red_text_with_line_breaks(target_cell, attendance_list)
 
840
  replacements_made += cell_replacements
841
 
842
- print(f" βœ… Added {len(attendance_list)} attendance items with proper line breaks")
843
  print(f" πŸ“Š Replacements made: {cell_replacements}")
844
 
845
  return replacements_made
846
 
847
- # 🎯 FINAL FIX 2: Generic Management Summary fix for ALL types (Mass, Fatigue, Maintenance)
848
- def handle_management_summary_fix(cell, flat_json):
849
- """FINAL FIX: Handle ANY Management Summary section (Mass/Fatigue/Maintenance) - RED TEXT ONLY"""
850
- if not has_red_text(cell):
851
- return 0
852
-
853
- # Check if this cell contains any Management Summary
854
- cell_text = get_clean_text(cell).lower()
855
-
856
- # Detect which type of management summary this is
857
- management_type = None
858
- if "mass management" in cell_text and "summary" in cell_text:
859
- management_type = "Mass Management"
860
- elif "fatigue management" in cell_text and "summary" in cell_text:
861
- management_type = "Fatigue Management"
862
- elif "maintenance management" in cell_text and "summary" in cell_text:
863
- management_type = "Maintenance Management"
864
-
865
- if not management_type:
866
- return 0
867
-
868
- print(f" 🎯 FINAL FIX: {management_type} Summary processing - RED TEXT ONLY")
869
-
870
- # ONLY process red text segments, not the entire cell text
871
- red_segments = extract_red_text_segments(cell)
872
- replacements_made = 0
873
-
874
- # Try to replace ONLY the red text segments
875
- for segment in red_segments:
876
- segment_text = segment['text'].strip()
877
- if not segment_text:
878
- continue
879
-
880
- print(f" πŸ” Processing red text segment: '{segment_text[:50]}...'")
881
-
882
- # Try multiple variations based on the management type
883
- summary_value = None
884
- field_attempts = [
885
- f"{management_type} Summary of Audit findings",
886
- f"{management_type} Summary",
887
- f"{management_type.lower()} summary",
888
- management_type.lower(),
889
- segment_text # Also try the exact red text
890
- ]
891
-
892
- # Also try variations without "Management"
893
- base_type = management_type.replace(" Management", "")
894
- field_attempts.extend([
895
- f"{base_type} Management Summary of Audit findings",
896
- f"{base_type} Summary of Audit findings",
897
- f"{base_type} Summary",
898
- f"{base_type.lower()} summary"
899
- ])
900
-
901
- for field_attempt in field_attempts:
902
- summary_value = find_matching_json_value(field_attempt, flat_json)
903
- if summary_value is not None:
904
- print(f" βœ… Found match with field: '{field_attempt}'")
905
- break
906
-
907
- if summary_value is not None:
908
- replacement_text = get_value_as_string(summary_value, segment_text)
909
- if isinstance(summary_value, list):
910
- replacement_text = "\n".join(str(item) for item in summary_value if str(item).strip())
911
-
912
- success = replace_single_segment(segment, replacement_text)
913
- if success:
914
- replacements_made += 1
915
- print(f" βœ… Fixed {management_type} Summary segment: '{segment_text[:30]}...' -> '{replacement_text[:30]}...'")
916
- else:
917
- print(f" ❌ No match found for red text: '{segment_text[:30]}...'")
918
-
919
- # If no individual segment matches, try combined approach on red text only
920
- if replacements_made == 0 and red_segments:
921
- combined_red_text = " ".join(seg['text'] for seg in red_segments).strip()
922
- print(f" πŸ”„ Trying combined red text match: '{combined_red_text[:50]}...'")
923
-
924
- # Try combined text matching with all field variations
925
- field_attempts = [
926
- f"{management_type} Summary of Audit findings",
927
- f"{management_type} Summary",
928
- f"{management_type.lower()} summary",
929
- combined_red_text
930
- ]
931
-
932
- base_type = management_type.replace(" Management", "")
933
- field_attempts.extend([
934
- f"{base_type} Management Summary of Audit findings",
935
- f"{base_type} Summary of Audit findings",
936
- f"{base_type} Summary"
937
- ])
938
-
939
- for field_attempt in field_attempts:
940
- summary_value = find_matching_json_value(field_attempt, flat_json)
941
- if summary_value is not None:
942
- replacement_text = get_value_as_string(summary_value, combined_red_text)
943
- if isinstance(summary_value, list):
944
- replacement_text = "\n".join(str(item) for item in summary_value if str(item).strip())
945
-
946
- replacements_made = replace_all_red_segments(red_segments, replacement_text)
947
- print(f" βœ… Fixed {management_type} Summary combined red text with field: '{field_attempt}'")
948
- break
949
-
950
- return replacements_made
951
-
952
  def fix_management_summary_details_column(table, flat_json):
953
  """Fix the DETAILS column in Management Summary table"""
954
  replacements_made = 0
955
 
956
- print(f" 🎯 FIX 1: Management Summary DETAILS column processing")
957
 
958
  # Check if this is a Management Summary table
959
  table_text = ""
@@ -979,7 +578,6 @@ def fix_management_summary_details_column(table, flat_json):
979
  if has_red_text(details_cell):
980
  print(f" πŸ” Found Std 5. Verification with red text")
981
 
982
- # Use the exact data from your JSON
983
  json_value = find_matching_json_value("Std 5. Verification", flat_json)
984
  if json_value is not None:
985
  replacement_text = get_value_as_string(json_value, "Std 5. Verification")
@@ -991,7 +589,6 @@ def fix_management_summary_details_column(table, flat_json):
991
  if has_red_text(details_cell):
992
  print(f" πŸ” Found Std 6. Internal Review with red text")
993
 
994
- # Use the exact data from your JSON
995
  json_value = find_matching_json_value("Std 6. Internal Review", flat_json)
996
  if json_value is not None:
997
  replacement_text = get_value_as_string(json_value, "Std 6. Internal Review")
@@ -999,7 +596,7 @@ def fix_management_summary_details_column(table, flat_json):
999
  replacements_made += cell_replacements
1000
  print(f" βœ… Replaced Std 6. Internal Review details")
1001
 
1002
- return replacements_made # βœ… This return is INSIDE the function
1003
 
1004
  def fix_operator_declaration_empty_values(table, flat_json):
1005
  """Fix Operator Declaration table when values are empty"""
 
3
  from docx.shared import RGBColor
4
  import re
5
 
6
+ # Heading patterns for document structure detection
7
  HEADING_PATTERNS = {
8
  "main": [
9
  r"NHVAS\s+Audit\s+Summary\s+Report",
 
27
  ]
28
  }
29
 
30
+ # ============================================================================
31
+ # UTILITY FUNCTIONS
32
+ # ============================================================================
33
+
34
  def load_json(filepath):
35
  with open(filepath, 'r') as file:
36
  return json.load(file)
 
64
  else:
65
  return str(value)
66
 
67
+ def get_clean_text(cell):
68
+ text = ""
69
+ for paragraph in cell.paragraphs:
70
+ for run in paragraph.runs:
71
+ text += run.text
72
+ return text.strip()
73
+
74
+ def has_red_text(cell):
75
+ for paragraph in cell.paragraphs:
76
+ for run in paragraph.runs:
77
+ if is_red(run) and run.text.strip():
78
+ return True
79
+ return False
80
+
81
+ def has_red_text_in_paragraph(paragraph):
82
+ for run in paragraph.runs:
83
+ if is_red(run) and run.text.strip():
84
+ return True
85
+ return False
86
+
87
+ # ============================================================================
88
+ # JSON MATCHING FUNCTIONS
89
+ # ============================================================================
90
+
91
  def find_matching_json_value(field_name, flat_json):
92
+ """Find matching value in JSON with multiple strategies"""
93
  field_name = field_name.strip()
94
 
95
  # Try exact match first
 
103
  print(f" βœ… Case-insensitive match found for key '{field_name}' with JSON key '{key}'")
104
  return value
105
 
106
+ # Better Print Name detection for operator vs auditor
107
  if field_name.lower().strip() == "print name":
 
108
  operator_keys = [k for k in flat_json.keys() if "operator" in k.lower() and "print name" in k.lower()]
109
  auditor_keys = [k for k in flat_json.keys() if "auditor" in k.lower() and ("print name" in k.lower() or "name" in k.lower())]
110
 
 
111
  if operator_keys:
112
  print(f" βœ… Operator Print Name match: '{field_name}' -> '{operator_keys[0]}'")
113
  return flat_json[operator_keys[0]]
 
169
  print(f" ❌ No match found for '{field_name}'")
170
  return None
171
 
172
+ # ============================================================================
173
+ # RED TEXT PROCESSING FUNCTIONS
174
+ # ============================================================================
 
 
 
 
 
 
 
 
 
 
175
 
176
  def extract_red_text_segments(cell):
177
+ """Extract red text segments from a cell"""
178
  red_segments = []
179
 
180
  for para_idx, paragraph in enumerate(cell.paragraphs):
 
207
 
208
  return red_segments
209
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  def replace_all_red_segments(red_segments, replacement_text):
211
+ """Replace all red segments with replacement text"""
212
  if not red_segments:
213
  return 0
214
 
 
241
 
242
  for line in replacement_lines[1:]:
243
  if line.strip():
244
+ from docx.oxml import OxmlElement
245
  br = OxmlElement('w:br')
246
  first_run.element.append(br)
247
 
 
256
  return replacements_made
257
 
258
  def replace_single_segment(segment, replacement_text):
259
+ """Replace a single red text segment"""
260
  if not segment['runs']:
261
  return False
262
 
 
269
 
270
  return True
271
 
272
+ def replace_red_text_in_cell(cell, replacement_text):
273
+ """Replace red text in a cell with replacement text"""
274
  red_segments = extract_red_text_segments(cell)
275
 
276
  if not red_segments:
277
  return 0
278
 
279
+ return replace_all_red_segments(red_segments, replacement_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
 
281
+ # ============================================================================
282
+ # SPECIALIZED TABLE HANDLERS
283
+ # ============================================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
 
285
  def handle_australian_company_number(row, company_numbers):
286
+ """Handle Australian Company Number digit placement"""
287
  replacements_made = 0
288
  for i, digit in enumerate(company_numbers):
289
  cell_idx = i + 1
 
296
  return replacements_made
297
 
298
  def handle_vehicle_registration_table(table, flat_json):
299
+ """Handle vehicle registration table data replacement"""
300
  replacements_made = 0
301
 
302
  # Try to find vehicle registration data
 
438
 
439
  return replacements_made
440
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
441
  def handle_attendance_list_table_enhanced(table, flat_json):
442
  """Enhanced Attendance List processing with better detection"""
443
  replacements_made = 0
 
451
 
452
  # Scan all cells in the first few rows for attendance list indicators
453
  found_attendance_row = None
 
454
 
455
  for row_idx, row in enumerate(table.rows[:3]): # Check first 3 rows
456
  for cell_idx, cell in enumerate(row.cells):
 
459
  # Check if this cell contains attendance list header
460
  if any(pattern in cell_text for pattern in attendance_patterns):
461
  found_attendance_row = row_idx
 
462
  print(f" 🎯 ENHANCED: Found Attendance List in row {row_idx + 1}, cell {cell_idx + 1}")
463
  break
464
 
 
468
  if found_attendance_row is None:
469
  return 0
470
 
471
+ # Look for attendance data in JSON
472
  attendance_value = None
473
  attendance_search_keys = [
474
  "Attendance List (Names and Position Titles).Attendance List (Names and Position Titles)",
 
490
  print(f" ❌ No attendance data found in JSON")
491
  return 0
492
 
493
+ # Look for red text in ALL cells of the table
494
  target_cell = None
495
 
496
  print(f" πŸ” Scanning ALL cells in attendance table for red text...")
 
524
  print(f" ⚠️ No red text found that looks like attendance data")
525
  return 0
526
 
527
+ # Replace red text with properly formatted attendance list
528
  if has_red_text(target_cell):
529
  print(f" πŸ”§ Replacing red text with properly formatted attendance list...")
530
 
 
538
  for i, item in enumerate(attendance_list):
539
  print(f" {i+1}. {item}")
540
 
541
+ # Replace with line-separated attendance list
542
+ replacement_text = "\n".join(attendance_list)
543
+ cell_replacements = replace_red_text_in_cell(target_cell, replacement_text)
544
  replacements_made += cell_replacements
545
 
546
+ print(f" βœ… Added {len(attendance_list)} attendance items")
547
  print(f" πŸ“Š Replacements made: {cell_replacements}")
548
 
549
  return replacements_made
550
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
551
  def fix_management_summary_details_column(table, flat_json):
552
  """Fix the DETAILS column in Management Summary table"""
553
  replacements_made = 0
554
 
555
+ print(f" 🎯 FIX: Management Summary DETAILS column processing")
556
 
557
  # Check if this is a Management Summary table
558
  table_text = ""
 
578
  if has_red_text(details_cell):
579
  print(f" πŸ” Found Std 5. Verification with red text")
580
 
 
581
  json_value = find_matching_json_value("Std 5. Verification", flat_json)
582
  if json_value is not None:
583
  replacement_text = get_value_as_string(json_value, "Std 5. Verification")
 
589
  if has_red_text(details_cell):
590
  print(f" πŸ” Found Std 6. Internal Review with red text")
591
 
 
592
  json_value = find_matching_json_value("Std 6. Internal Review", flat_json)
593
  if json_value is not None:
594
  replacement_text = get_value_as_string(json_value, "Std 6. Internal Review")
 
596
  replacements_made += cell_replacements
597
  print(f" βœ… Replaced Std 6. Internal Review details")
598
 
599
+ return replacements_made
600
 
601
  def fix_operator_declaration_empty_values(table, flat_json):
602
  """Fix Operator Declaration table when values are empty"""