Spaces:

Shami96
/

PDF-Data_Extractor

Running

App Files Files Community

Shami96 commited on Aug 21

Commit

b93e8d9

verified ·

1 Parent(s): 758c040

Update updated_word.py

Browse files

Files changed (1) hide show

updated_word.py +55 -458

updated_word.py CHANGED Viewed

@@ -3,7 +3,7 @@ from docx import Document
 from docx.shared import RGBColor
 import re
-# Your original heading patterns (unchanged)
 HEADING_PATTERNS = {
     "main": [
         r"NHVAS\s+Audit\s+Summary\s+Report",
@@ -27,6 +27,10 @@ HEADING_PATTERNS = {
     ]
 }
 def load_json(filepath):
     with open(filepath, 'r') as file:
         return json.load(file)
@@ -60,8 +64,32 @@ def get_value_as_string(value, field_name=""):
     else:
         return str(value)
 def find_matching_json_value(field_name, flat_json):
-    """Your original matching function with minimal improvements"""
     field_name = field_name.strip()
     # Try exact match first
@@ -75,13 +103,11 @@ def find_matching_json_value(field_name, flat_json):
             print(f"    ✅ Case-insensitive match found for key '{field_name}' with JSON key '{key}'")
             return value
-    # 🎯 MINIMAL IMPROVEMENT: Better Print Name detection for operator vs auditor
     if field_name.lower().strip() == "print name":
-        # Look in the flat_json keys to see what context we're in
         operator_keys = [k for k in flat_json.keys() if "operator" in k.lower() and "print name" in k.lower()]
         auditor_keys = [k for k in flat_json.keys() if "auditor" in k.lower() and ("print name" in k.lower() or "name" in k.lower())]
-        # If we have operator-specific keys, prefer those in operator context
         if operator_keys:
             print(f"    ✅ Operator Print Name match: '{field_name}' -> '{operator_keys[0]}'")
             return flat_json[operator_keys[0]]
@@ -143,22 +169,12 @@ def find_matching_json_value(field_name, flat_json):
     print(f"    ❌ No match found for '{field_name}'")
     return None
-def get_clean_text(cell):
-    text = ""
-    for paragraph in cell.paragraphs:
-        for run in paragraph.runs:
-            text += run.text
-    return text.strip()
-def has_red_text(cell):
-    for paragraph in cell.paragraphs:
-        for run in paragraph.runs:
-            if is_red(run) and run.text.strip():
-                return True
-    return False
 def extract_red_text_segments(cell):
-    """Your original red text extraction (unchanged)"""
     red_segments = []
     for para_idx, paragraph in enumerate(cell.paragraphs):
@@ -191,27 +207,8 @@ def extract_red_text_segments(cell):
     return red_segments
-def replace_red_text_in_cell(cell, replacement_text):
-    """Your original replacement function (unchanged)"""
-    red_segments = extract_red_text_segments(cell)
-    if not red_segments:
-        return 0
-    if len(red_segments) > 1:
-        replacements_made = 0
-        for segment in red_segments:
-            segment_text = segment['text'].strip()
-            if segment_text:
-                pass
-        if replacements_made == 0:
-            return replace_all_red_segments(red_segments, replacement_text)
-    return replace_all_red_segments(red_segments, replacement_text)
 def replace_all_red_segments(red_segments, replacement_text):
-    """Your original function (unchanged)"""
     if not red_segments:
         return 0
@@ -244,7 +241,7 @@ def replace_all_red_segments(red_segments, replacement_text):
             for line in replacement_lines[1:]:
                 if line.strip():
-                    from docx.oxml import OxmlElement, ns
                     br = OxmlElement('w:br')
                     first_run.element.append(br)
@@ -259,7 +256,7 @@ def replace_all_red_segments(red_segments, replacement_text):
     return replacements_made
 def replace_single_segment(segment, replacement_text):
-    """Your original function (unchanged)"""
     if not segment['runs']:
         return False
@@ -272,197 +269,21 @@ def replace_single_segment(segment, replacement_text):
     return True
-def handle_multiple_red_segments_in_cell(cell, flat_json):
-    """Your original function (unchanged)"""
     red_segments = extract_red_text_segments(cell)
     if not red_segments:
         return 0
-    print(f"      🔍 Found {len(red_segments)} red text segments in cell")
-    replacements_made = 0
-    unmatched_segments = []
-    for i, segment in enumerate(red_segments):
-        segment_text = segment['text'].strip()
-        if not segment_text:
-            continue
-        print(f"        Segment {i+1}: '{segment_text[:50]}...'")
-        json_value = find_matching_json_value(segment_text, flat_json)
-        if json_value is not None:
-            replacement_text = get_value_as_string(json_value, segment_text)
-            if isinstance(json_value, list) and len(json_value) > 1:
-                replacement_text = "\n".join(str(item) for item in json_value if str(item).strip())
-            success = replace_single_segment(segment, replacement_text)
-            if success:
-                replacements_made += 1
-                print(f"        ✅ Replaced segment '{segment_text[:30]}...' with '{replacement_text[:30]}...'")
-        else:
-            unmatched_segments.append(segment)
-            print(f"        ⏳ No individual match for segment '{segment_text[:30]}...'")
-    if unmatched_segments and replacements_made == 0:
-        combined_text = " ".join(seg['text'] for seg in red_segments).strip()
-        print(f"      🔄 Trying combined text match: '{combined_text[:50]}...'")
-        json_value = find_matching_json_value(combined_text, flat_json)
-        if json_value is not None:
-            replacement_text = get_value_as_string(json_value, combined_text)
-            if isinstance(json_value, list) and len(json_value) > 1:
-                replacement_text = "\n".join(str(item) for item in json_value if str(item).strip())
-            replacements_made = replace_all_red_segments(red_segments, replacement_text)
-            print(f"      ✅ Replaced combined text with '{replacement_text[:50]}...'")
-    return replacements_made
-# 🎯 SURGICAL FIX 1: Handle Nature of Business multi-line red text
-def handle_nature_business_multiline_fix(cell, flat_json):
-    """SURGICAL FIX: Handle multi-line red text in Nature of Business cells"""
-    if not has_red_text(cell):
-        return 0
-    # Check if this cell contains "Nature of the Operators Business"
-    cell_text = get_clean_text(cell).lower()
-    if "nature of the operators business" not in cell_text and "nature of the operator business" not in cell_text:
-        return 0
-    print(f"    🎯 SURGICAL FIX: Nature of Business multi-line processing")
-    # Look for sub-fields like "Accreditation Number:" and "Expiry Date:"
-    red_segments = extract_red_text_segments(cell)
-    replacements_made = 0
-    # Try to replace each segment individually first
-    for segment in red_segments:
-        segment_text = segment['text'].strip()
-        if not segment_text:
-            continue
-        json_value = find_matching_json_value(segment_text, flat_json)
-        if json_value is not None:
-            replacement_text = get_value_as_string(json_value, segment_text)
-            success = replace_single_segment(segment, replacement_text)
-            if success:
-                replacements_made += 1
-                print(f"        ✅ Fixed segment: '{segment_text[:30]}...'")
-    # If no individual matches, try combined approach
-    if replacements_made == 0 and red_segments:
-        combined_text = " ".join(seg['text'] for seg in red_segments).strip()
-        json_value = find_matching_json_value(combined_text, flat_json)
-        if json_value is not None:
-            replacement_text = get_value_as_string(json_value, combined_text)
-            replacements_made = replace_all_red_segments(red_segments, replacement_text)
-            print(f"        ✅ Fixed combined text")
-    return replacements_made
-# 🎯 SURGICAL FIX 2: Handle Operator Declaration table with context awareness
-def handle_operator_declaration_fix(table, flat_json):
-    """SURGICAL FIX: Handle Operator Declaration Print Name and Position Title with better context detection"""
-    replacements_made = 0
-    # Build table context to understand what type of declaration this is
-    table_context = ""
-    for row in table.rows:
-        for cell in row.cells:
-            table_context += get_clean_text(cell).lower() + " "
-    # Determine if this is an operator declaration vs auditor declaration
-    is_operator_declaration = any(keyword in table_context for keyword in [
-        "hereby acknowledge", "findings detailed", "management system",
-        "accreditation to be shared", "operator signature"
-    ])
-    is_auditor_declaration = any(keyword in table_context for keyword in [
-        "nhvas approved auditor", "auditor registration", "hereby certify",
-        "auditor signature"
-    ])
-    # Process the table based on context
-    for row_idx, row in enumerate(table.rows):
-        if len(row.cells) >= 2:
-            cell1_text = get_clean_text(row.cells[0]).strip()
-            cell2_text = get_clean_text(row.cells[1]).strip()
-            # Check if this is a header row with Print Name and Position Title
-            if ("print name" in cell1_text.lower() and "position title" in cell2_text.lower() and
-                len(table.rows) <= 4):  # Small table only
-                context_type = "Operator" if is_operator_declaration else ("Auditor" if is_auditor_declaration else "Unknown")
-                print(f"    🎯 SURGICAL FIX: {context_type} Declaration table detected")
-                # Look for the data row (should be next row)
-                if row_idx + 1 < len(table.rows):
-                    data_row = table.rows[row_idx + 1]
-                    if len(data_row.cells) >= 2:
-                        name_cell = data_row.cells[0]
-                        position_cell = data_row.cells[1]
-                        # Fix Print Name based on context
-                        if has_red_text(name_cell):
-                            name_value = None
-                            if is_operator_declaration:
-                                # Try operator-specific fields first
-                                for field_attempt in ["Operator Declaration.Print Name", "operator.print name", "Print Name"]:
-                                    name_value = find_matching_json_value(field_attempt, flat_json)
-                                    if name_value is not None:
-                                        break
-                            elif is_auditor_declaration:
-                                # Try auditor-specific fields first
-                                for field_attempt in ["NHVAS Approved Auditor Declaration.Print Name", "auditor name", "auditor", "Print Name"]:
-                                    name_value = find_matching_json_value(field_attempt, flat_json)
-                                    if name_value is not None:
-                                        break
-                            else:
-                                # Fallback to generic
-                                name_value = find_matching_json_value("Print Name", flat_json)
-                            if name_value is not None:
-                                name_text = get_value_as_string(name_value)
-                                cell_replacements = replace_red_text_in_cell(name_cell, name_text)
-                                replacements_made += cell_replacements
-                                print(f"        ✅ Fixed {context_type} Print Name: '{name_text}'")
-                        # Fix Position Title based on context
-                        if has_red_text(position_cell):
-                            position_value = None
-                            if is_operator_declaration:
-                                # Try operator-specific fields first
-                                for field_attempt in ["Operator Declaration.Position Title", "operator.position title", "Position Title"]:
-                                    position_value = find_matching_json_value(field_attempt, flat_json)
-                                    if position_value is not None:
-                                        break
-                            elif is_auditor_declaration:
-                                # Try auditor registration number for auditor declarations
-                                for field_attempt in ["NHVR or Exemplar Global Auditor Registration Number", "auditor registration", "registration number"]:
-                                    position_value = find_matching_json_value(field_attempt, flat_json)
-                                    if position_value is not None:
-                                        break
-                            else:
-                                # Fallback to generic
-                                position_value = find_matching_json_value("Position Title", flat_json)
-                            if position_value is not None:
-                                position_text = get_value_as_string(position_value)
-                                cell_replacements = replace_red_text_in_cell(position_cell, position_text)
-                                replacements_made += cell_replacements
-                                print(f"        ✅ Fixed {context_type} Position/Registration: '{position_text}'")
-                break  # Found the table, stop looking
-    return replacements_made
 def handle_australian_company_number(row, company_numbers):
-    """Your original function (unchanged)"""
     replacements_made = 0
     for i, digit in enumerate(company_numbers):
         cell_idx = i + 1
@@ -475,7 +296,7 @@ def handle_australian_company_number(row, company_numbers):
     return replacements_made
 def handle_vehicle_registration_table(table, flat_json):
-    """Your original function (unchanged)"""
     replacements_made = 0
     # Try to find vehicle registration data
@@ -617,122 +438,6 @@ def handle_vehicle_registration_table(table, flat_json):
     return replacements_made
-def handle_print_accreditation_section(table, flat_json):
-    """Your original function (unchanged)"""
-    replacements_made = 0
-    print_data = flat_json.get("print accreditation name.print accreditation name", [])
-    if not isinstance(print_data, list) or len(print_data) < 2:
-        return 0
-    name_value = print_data[0]
-    position_value = print_data[1]
-    print(f"    📋 Print accreditation data: Name='{name_value}', Position='{position_value}'")
-    for row_idx, row in enumerate(table.rows):
-        if len(row.cells) >= 2:
-            cell1_text = get_clean_text(row.cells[0]).lower()
-            cell2_text = get_clean_text(row.cells[1]).lower()
-            if "print name" in cell1_text and "position title" in cell2_text:
-                print(f"    📍 Found header row {row_idx + 1}: '{cell1_text}' | '{cell2_text}'")
-                if row_idx + 1 < len(table.rows):
-                    data_row = table.rows[row_idx + 1]
-                    if len(data_row.cells) >= 2:
-                        if has_red_text(data_row.cells[0]):
-                            cell_replacements = replace_red_text_in_cell(data_row.cells[0], name_value)
-                            replacements_made += cell_replacements
-                            if cell_replacements > 0:
-                                print(f"      ✅ Replaced Print Name: '{name_value}'")
-                        if has_red_text(data_row.cells[1]):
-                            cell_replacements = replace_red_text_in_cell(data_row.cells[1], position_value)
-                            replacements_made += cell_replacements
-                            if cell_replacements > 0:
-                                print(f"      ✅ Replaced Position Title: '{position_value}'")
-                break
-    return replacements_made
-def process_single_column_sections(cell, field_name, flat_json):
-    """Your original function (unchanged)"""
-    json_value = find_matching_json_value(field_name, flat_json)
-    if json_value is not None:
-        replacement_text = get_value_as_string(json_value, field_name)
-        if isinstance(json_value, list) and len(json_value) > 1:
-            replacement_text = "\n".join(str(item) for item in json_value)
-        if has_red_text(cell):
-            print(f"    ✅ Replacing red text in single-column section: '{field_name}'")
-            print(f"    ✅ Replacement text:\n{replacement_text}")
-            cell_replacements = replace_red_text_in_cell(cell, replacement_text)
-            if cell_replacements > 0:
-                print(f"    -> Replaced with: '{replacement_text[:100]}...'")
-                return cell_replacements
-    return 0
-def replace_red_text_with_line_breaks(cell, attendance_list):
-    """Custom function to replace red text with properly formatted attendance list"""
-    replacements_made = 0
-    # Find all red text runs and their paragraphs
-    red_runs = []
-    target_paragraph = None
-    for paragraph in cell.paragraphs:
-        for run in paragraph.runs:
-            if is_red(run) and run.text.strip():
-                red_runs.append(run)
-                if target_paragraph is None:
-                    target_paragraph = paragraph
-    if not red_runs or not target_paragraph:
-        return 0
-    print(f"    🔧 Found {len(red_runs)} red runs to replace")
-    # Clear all red text first
-    for run in red_runs:
-        run.text = ''
-    # Add the first attendance item to the first red run
-    if len(attendance_list) > 0 and red_runs:
-        first_run = red_runs[0]
-        first_run.text = str(attendance_list[0]).strip()
-        first_run.font.color.rgb = RGBColor(0, 0, 0)  # Make it black
-        replacements_made += 1
-        print(f"    ✅ Added first item: '{attendance_list[0]}'")
-        # Add remaining items with line breaks
-        for item in attendance_list[1:]:
-            item_text = str(item).strip()
-            if item_text:
-                try:
-                    # Method 1: Try to add line break and new run to the same paragraph
-                    from docx.oxml import OxmlElement
-                    br = OxmlElement('w:br')
-                    first_run._element.append(br)
-                    # Add the text to the same run after the line break
-                    first_run.text += item_text
-                    replacements_made += 1
-                    print(f"    ✅ Added item with line break: '{item_text}'")
-                except Exception as e:
-                    print(f"    ⚠️ Line break method failed: {e}")
-                    # Fallback: try to add as new paragraph
-                    try:
-                        new_para = cell.add_paragraph()
-                        new_run = new_para.add_run(item_text)
-                        new_run.font.color.rgb = RGBColor(0, 0, 0)
-                        replacements_made += 1
-                        print(f"    ✅ Added as new paragraph: '{item_text}'")
-                    except Exception as e2:
-                        print(f"    ❌ Both methods failed: {e2}")
-    return replacements_made
 def handle_attendance_list_table_enhanced(table, flat_json):
     """Enhanced Attendance List processing with better detection"""
     replacements_made = 0
@@ -746,7 +451,6 @@ def handle_attendance_list_table_enhanced(table, flat_json):
     # Scan all cells in the first few rows for attendance list indicators
     found_attendance_row = None
-    found_attendance_cell = None
     for row_idx, row in enumerate(table.rows[:3]):  # Check first 3 rows
         for cell_idx, cell in enumerate(row.cells):
@@ -755,7 +459,6 @@ def handle_attendance_list_table_enhanced(table, flat_json):
             # Check if this cell contains attendance list header
             if any(pattern in cell_text for pattern in attendance_patterns):
                 found_attendance_row = row_idx
-                found_attendance_cell = cell_idx
                 print(f"    🎯 ENHANCED: Found Attendance List in row {row_idx + 1}, cell {cell_idx + 1}")
                 break
@@ -765,7 +468,7 @@ def handle_attendance_list_table_enhanced(table, flat_json):
     if found_attendance_row is None:
         return 0
-    # 🔧 FIX: Look for attendance data in JSON
     attendance_value = None
     attendance_search_keys = [
         "Attendance List (Names and Position Titles).Attendance List (Names and Position Titles)",
@@ -787,7 +490,7 @@ def handle_attendance_list_table_enhanced(table, flat_json):
         print(f"    ❌ No attendance data found in JSON")
         return 0
-    # 🔧 CRITICAL FIX: Look for red text in ALL cells of the table, not just the header
     target_cell = None
     print(f"    🔍 Scanning ALL cells in attendance table for red text...")
@@ -821,7 +524,7 @@ def handle_attendance_list_table_enhanced(table, flat_json):
         print(f"    ⚠️ No red text found that looks like attendance data")
         return 0
-    # 🔧 NEW FIX: Use custom function to handle line breaks properly
     if has_red_text(target_cell):
         print(f"    🔧 Replacing red text with properly formatted attendance list...")
@@ -835,125 +538,21 @@ def handle_attendance_list_table_enhanced(table, flat_json):
         for i, item in enumerate(attendance_list):
             print(f"        {i+1}. {item}")
-        # Use our custom function that handles line breaks properly
-        cell_replacements = replace_red_text_with_line_breaks(target_cell, attendance_list)
         replacements_made += cell_replacements
-        print(f"    ✅ Added {len(attendance_list)} attendance items with proper line breaks")
         print(f"    📊 Replacements made: {cell_replacements}")
     return replacements_made
-# 🎯 FINAL FIX 2: Generic Management Summary fix for ALL types (Mass, Fatigue, Maintenance)
-def handle_management_summary_fix(cell, flat_json):
-    """FINAL FIX: Handle ANY Management Summary section (Mass/Fatigue/Maintenance) - RED TEXT ONLY"""
-    if not has_red_text(cell):
-        return 0
-    # Check if this cell contains any Management Summary
-    cell_text = get_clean_text(cell).lower()
-    # Detect which type of management summary this is
-    management_type = None
-    if "mass management" in cell_text and "summary" in cell_text:
-        management_type = "Mass Management"
-    elif "fatigue management" in cell_text and "summary" in cell_text:
-        management_type = "Fatigue Management"
-    elif "maintenance management" in cell_text and "summary" in cell_text:
-        management_type = "Maintenance Management"
-    if not management_type:
-        return 0
-    print(f"    🎯 FINAL FIX: {management_type} Summary processing - RED TEXT ONLY")
-    # ONLY process red text segments, not the entire cell text
-    red_segments = extract_red_text_segments(cell)
-    replacements_made = 0
-    # Try to replace ONLY the red text segments
-    for segment in red_segments:
-        segment_text = segment['text'].strip()
-        if not segment_text:
-            continue
-        print(f"        🔍 Processing red text segment: '{segment_text[:50]}...'")
-        # Try multiple variations based on the management type
-        summary_value = None
-        field_attempts = [
-            f"{management_type} Summary of Audit findings",
-            f"{management_type} Summary",
-            f"{management_type.lower()} summary",
-            management_type.lower(),
-            segment_text  # Also try the exact red text
-        ]
-        # Also try variations without "Management"
-        base_type = management_type.replace(" Management", "")
-        field_attempts.extend([
-            f"{base_type} Management Summary of Audit findings",
-            f"{base_type} Summary of Audit findings",
-            f"{base_type} Summary",
-            f"{base_type.lower()} summary"
-        ])
-        for field_attempt in field_attempts:
-            summary_value = find_matching_json_value(field_attempt, flat_json)
-            if summary_value is not None:
-                print(f"        ✅ Found match with field: '{field_attempt}'")
-                break
-        if summary_value is not None:
-            replacement_text = get_value_as_string(summary_value, segment_text)
-            if isinstance(summary_value, list):
-                replacement_text = "\n".join(str(item) for item in summary_value if str(item).strip())
-            success = replace_single_segment(segment, replacement_text)
-            if success:
-                replacements_made += 1
-                print(f"        ✅ Fixed {management_type} Summary segment: '{segment_text[:30]}...' -> '{replacement_text[:30]}...'")
-        else:
-            print(f"        ❌ No match found for red text: '{segment_text[:30]}...'")
-    # If no individual segment matches, try combined approach on red text only
-    if replacements_made == 0 and red_segments:
-        combined_red_text = " ".join(seg['text'] for seg in red_segments).strip()
-        print(f"        🔄 Trying combined red text match: '{combined_red_text[:50]}...'")
-        # Try combined text matching with all field variations
-        field_attempts = [
-            f"{management_type} Summary of Audit findings",
-            f"{management_type} Summary",
-            f"{management_type.lower()} summary",
-            combined_red_text
-        ]
-        base_type = management_type.replace(" Management", "")
-        field_attempts.extend([
-            f"{base_type} Management Summary of Audit findings",
-            f"{base_type} Summary of Audit findings",
-            f"{base_type} Summary"
-        ])
-        for field_attempt in field_attempts:
-            summary_value = find_matching_json_value(field_attempt, flat_json)
-            if summary_value is not None:
-                replacement_text = get_value_as_string(summary_value, combined_red_text)
-                if isinstance(summary_value, list):
-                    replacement_text = "\n".join(str(item) for item in summary_value if str(item).strip())
-                replacements_made = replace_all_red_segments(red_segments, replacement_text)
-                print(f"        ✅ Fixed {management_type} Summary combined red text with field: '{field_attempt}'")
-                break
-    return replacements_made
 def fix_management_summary_details_column(table, flat_json):
     """Fix the DETAILS column in Management Summary table"""
     replacements_made = 0
-    print(f"    🎯 FIX 1: Management Summary DETAILS column processing")
     # Check if this is a Management Summary table
     table_text = ""
@@ -979,7 +578,6 @@ def fix_management_summary_details_column(table, flat_json):
                 if has_red_text(details_cell):
                     print(f"      🔍 Found Std 5. Verification with red text")
-                    # Use the exact data from your JSON
                     json_value = find_matching_json_value("Std 5. Verification", flat_json)
                     if json_value is not None:
                         replacement_text = get_value_as_string(json_value, "Std 5. Verification")
@@ -991,7 +589,6 @@ def fix_management_summary_details_column(table, flat_json):
                 if has_red_text(details_cell):
                     print(f"      🔍 Found Std 6. Internal Review with red text")
-                    # Use the exact data from your JSON
                     json_value = find_matching_json_value("Std 6. Internal Review", flat_json)
                     if json_value is not None:
                         replacement_text = get_value_as_string(json_value, "Std 6. Internal Review")
@@ -999,7 +596,7 @@ def fix_management_summary_details_column(table, flat_json):
                         replacements_made += cell_replacements
                         print(f"      ✅ Replaced Std 6. Internal Review details")
-    return replacements_made  # ✅ This return is INSIDE the function
 def fix_operator_declaration_empty_values(table, flat_json):
     """Fix Operator Declaration table when values are empty"""

 from docx.shared import RGBColor
 import re
+# Heading patterns for document structure detection
 HEADING_PATTERNS = {
     "main": [
         r"NHVAS\s+Audit\s+Summary\s+Report",
     ]
 }
+# ============================================================================
+# UTILITY FUNCTIONS
+# ============================================================================
 def load_json(filepath):
     with open(filepath, 'r') as file:
         return json.load(file)
     else:
         return str(value)
+def get_clean_text(cell):
+    text = ""
+    for paragraph in cell.paragraphs:
+        for run in paragraph.runs:
+            text += run.text
+    return text.strip()
+def has_red_text(cell):
+    for paragraph in cell.paragraphs:
+        for run in paragraph.runs:
+            if is_red(run) and run.text.strip():
+                return True
+    return False
+def has_red_text_in_paragraph(paragraph):
+    for run in paragraph.runs:
+        if is_red(run) and run.text.strip():
+            return True
+    return False
+# ============================================================================
+# JSON MATCHING FUNCTIONS
+# ============================================================================
 def find_matching_json_value(field_name, flat_json):
+    """Find matching value in JSON with multiple strategies"""
     field_name = field_name.strip()
     # Try exact match first
             print(f"    ✅ Case-insensitive match found for key '{field_name}' with JSON key '{key}'")
             return value
+    # Better Print Name detection for operator vs auditor
     if field_name.lower().strip() == "print name":
         operator_keys = [k for k in flat_json.keys() if "operator" in k.lower() and "print name" in k.lower()]
         auditor_keys = [k for k in flat_json.keys() if "auditor" in k.lower() and ("print name" in k.lower() or "name" in k.lower())]
         if operator_keys:
             print(f"    ✅ Operator Print Name match: '{field_name}' -> '{operator_keys[0]}'")
             return flat_json[operator_keys[0]]
     print(f"    ❌ No match found for '{field_name}'")
     return None
+# ============================================================================
+# RED TEXT PROCESSING FUNCTIONS
+# ============================================================================
 def extract_red_text_segments(cell):
+    """Extract red text segments from a cell"""
     red_segments = []
     for para_idx, paragraph in enumerate(cell.paragraphs):
     return red_segments
 def replace_all_red_segments(red_segments, replacement_text):
+    """Replace all red segments with replacement text"""
     if not red_segments:
         return 0
             for line in replacement_lines[1:]:
                 if line.strip():
+                    from docx.oxml import OxmlElement
                     br = OxmlElement('w:br')
                     first_run.element.append(br)
     return replacements_made
 def replace_single_segment(segment, replacement_text):
+    """Replace a single red text segment"""
     if not segment['runs']:
         return False
     return True
+def replace_red_text_in_cell(cell, replacement_text):
+    """Replace red text in a cell with replacement text"""
     red_segments = extract_red_text_segments(cell)
     if not red_segments:
         return 0
+    return replace_all_red_segments(red_segments, replacement_text)
+# ============================================================================
+# SPECIALIZED TABLE HANDLERS
+# ============================================================================
 def handle_australian_company_number(row, company_numbers):
+    """Handle Australian Company Number digit placement"""
     replacements_made = 0
     for i, digit in enumerate(company_numbers):
         cell_idx = i + 1
     return replacements_made
 def handle_vehicle_registration_table(table, flat_json):
+    """Handle vehicle registration table data replacement"""
     replacements_made = 0
     # Try to find vehicle registration data
     return replacements_made
 def handle_attendance_list_table_enhanced(table, flat_json):
     """Enhanced Attendance List processing with better detection"""
     replacements_made = 0
     # Scan all cells in the first few rows for attendance list indicators
     found_attendance_row = None
     for row_idx, row in enumerate(table.rows[:3]):  # Check first 3 rows
         for cell_idx, cell in enumerate(row.cells):
             # Check if this cell contains attendance list header
             if any(pattern in cell_text for pattern in attendance_patterns):
                 found_attendance_row = row_idx
                 print(f"    🎯 ENHANCED: Found Attendance List in row {row_idx + 1}, cell {cell_idx + 1}")
                 break
     if found_attendance_row is None:
         return 0
+    # Look for attendance data in JSON
     attendance_value = None
     attendance_search_keys = [
         "Attendance List (Names and Position Titles).Attendance List (Names and Position Titles)",
         print(f"    ❌ No attendance data found in JSON")
         return 0
+    # Look for red text in ALL cells of the table
     target_cell = None
     print(f"    🔍 Scanning ALL cells in attendance table for red text...")
         print(f"    ⚠️ No red text found that looks like attendance data")
         return 0
+    # Replace red text with properly formatted attendance list
     if has_red_text(target_cell):
         print(f"    🔧 Replacing red text with properly formatted attendance list...")
         for i, item in enumerate(attendance_list):
             print(f"        {i+1}. {item}")
+        # Replace with line-separated attendance list
+        replacement_text = "\n".join(attendance_list)
+        cell_replacements = replace_red_text_in_cell(target_cell, replacement_text)
         replacements_made += cell_replacements
+        print(f"    ✅ Added {len(attendance_list)} attendance items")
         print(f"    📊 Replacements made: {cell_replacements}")
     return replacements_made
 def fix_management_summary_details_column(table, flat_json):
     """Fix the DETAILS column in Management Summary table"""
     replacements_made = 0
+    print(f"    🎯 FIX: Management Summary DETAILS column processing")
     # Check if this is a Management Summary table
     table_text = ""
                 if has_red_text(details_cell):
                     print(f"      🔍 Found Std 5. Verification with red text")
                     json_value = find_matching_json_value("Std 5. Verification", flat_json)
                     if json_value is not None:
                         replacement_text = get_value_as_string(json_value, "Std 5. Verification")
                 if has_red_text(details_cell):
                     print(f"      🔍 Found Std 6. Internal Review with red text")
                     json_value = find_matching_json_value("Std 6. Internal Review", flat_json)
                     if json_value is not None:
                         replacement_text = get_value_as_string(json_value, "Std 6. Internal Review")
                         replacements_made += cell_replacements
                         print(f"      ✅ Replaced Std 6. Internal Review details")
+    return replacements_made
 def fix_operator_declaration_empty_values(table, flat_json):
     """Fix Operator Declaration table when values are empty"""