Shami96 commited on
Commit
8b6ed83
·
verified ·
1 Parent(s): 3d0e65d

Upload master_key.py

Browse files
Files changed (1) hide show
  1. master_key.py +354 -0
master_key.py ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Improved Master Key for NHVAS Audit extraction:
3
+ - TABLE_SCHEMAS: Enhanced definitions with better matching criteria
4
+ - HEADING_PATTERNS: Improved regex patterns for main/sub headings
5
+ - PARAGRAPH_PATTERNS: Enhanced patterns for key narrative sections
6
+ """
7
+
8
+ # 1. Enhanced table schemas with better matching logic
9
+ TABLE_SCHEMAS = {
10
+ "Tick as appropriate": {
11
+ "headings": [
12
+ {"level": 1, "text": "NHVAS Audit Summary Report"},
13
+ ],
14
+ "orientation": "left",
15
+ "labels": [
16
+ "Mass",
17
+ "Entry Audit",
18
+ "Maintenance",
19
+ "Initial Compliance Audit",
20
+ "Basic Fatigue",
21
+ "Compliance Audit",
22
+ "Advanced Fatigue",
23
+ "Spot Check",
24
+ "Triggered Audit"
25
+ ],
26
+ "priority": 90 # High priority for direct match
27
+ },
28
+ "Audit Information": {
29
+ "orientation": "left",
30
+ "labels": [
31
+ "Date of Audit",
32
+ "Location of audit",
33
+ "Auditor name",
34
+ "Audit Matrix Identifier (Name or Number)",
35
+ "Auditor Exemplar Global Reg No.",
36
+ "expiry Date:",
37
+ "NHVR Auditor Registration Number",
38
+ "expiry Date:"
39
+ ],
40
+ "priority": 80
41
+ },
42
+ "Operator Information": {
43
+ "headings": [
44
+ {"level": 1, "text": "Operator Information"}
45
+ ],
46
+ "orientation": "left",
47
+ "labels": [
48
+ "Operator name (Legal entity)",
49
+ "NHVAS Accreditation No. (If applicable)",
50
+ "Registered trading name/s",
51
+ "Australian Company Number",
52
+ "NHVAS Manual (Policies and Procedures) developed by"
53
+ ],
54
+ "priority": 85
55
+ },
56
+ "Operator contact details": {
57
+ "orientation": "left",
58
+ "labels": [
59
+ "Operator business address",
60
+ "Operator Postal address",
61
+ "Email address",
62
+ "Operator Telephone Number"
63
+ ],
64
+ "priority": 75,
65
+ "context_keywords": ["contact", "address", "email", "telephone"]
66
+ },
67
+ "Attendance List (Names and Position Titles)": {
68
+ "headings": [
69
+ {"level": 1, "text": "NHVAS Audit Summary Report"}
70
+ ],
71
+ "orientation": "row1",
72
+ "labels": ["Attendance List (Names and Position Titles)"],
73
+ "priority": 90
74
+ },
75
+ "Nature of the Operators Business (Summary)": {
76
+ "orientation": "row1",
77
+ "labels": ["Nature of the Operators Business (Summary):"],
78
+ "split_labels": ["Accreditation Number:", "Expiry Date:"],
79
+ "priority": 85
80
+ },
81
+ "Accreditation Vehicle Summary": {
82
+ "orientation": "left",
83
+ "labels": ["Number of powered vehicles", "Number of trailing vehicles"],
84
+ "priority": 80
85
+ },
86
+ "Accreditation Driver Summary": {
87
+ "orientation": "left",
88
+ "labels": ["Number of drivers in BFM", "Number of drivers in AFM"],
89
+ "priority": 80
90
+ },
91
+ "Compliance Codes": {
92
+ "orientation": "left",
93
+ "labels": ["V", "NC", "TNC", "SFI", "NAP", "NA"],
94
+ "priority": 70,
95
+ "context_exclusions": ["MASS MANAGEMENT", "MAINTENANCE MANAGEMENT", "FATIGUE MANAGEMENT"]
96
+ },
97
+ "Corrective Action Request Identification": {
98
+ "orientation": "row1",
99
+ "labels": ["Title", "Abbreviation", "Description"],
100
+ "priority": 80
101
+ },
102
+ "Maintenance Management": {
103
+ "headings": [
104
+ {"level": 1, "text": "NHVAS AUDIT SUMMARY REPORT"}
105
+ ],
106
+ "orientation": "left",
107
+ "labels": [
108
+ "Std 1. Daily Check",
109
+ "Std 2. Fault Recording and Reporting",
110
+ "Std 3. Fault Repair",
111
+ "Std 4. Maintenance Schedules and Methods",
112
+ "Std 5. Records and Documentation",
113
+ "Std 6. Responsibilities",
114
+ "Std 7. Internal Review",
115
+ "Std 8. Training and Education"
116
+ ],
117
+ "priority": 60,
118
+ "context_keywords": ["maintenance"]
119
+ },
120
+ "Mass Management": {
121
+ "headings": [
122
+ {"level": 1, "text": "NHVAS AUDIT SUMMARY REPORT"}
123
+ ],
124
+ "orientation": "left",
125
+ "labels": [
126
+ "Std 1. Responsibilities",
127
+ "Std 2. Vehicle Control",
128
+ "Std 3. Vehicle Use",
129
+ "Std 4. Records and Documentation",
130
+ "Std 5. Verification",
131
+ "Std 6. Internal Review",
132
+ "Std 7. Training and Education",
133
+ "Std 8. Maintenance of Suspension"
134
+ ],
135
+ "priority": 60,
136
+ "context_keywords": ["mass"]
137
+ },
138
+ "Fatigue Management": {
139
+ "headings": [
140
+ {"level": 1, "text": "NHVAS AUDIT SUMMARY REPORT"}
141
+ ],
142
+ "orientation": "left",
143
+ "labels": [
144
+ "Std 1. Scheduling and Rostering",
145
+ "Std 2. Health and wellbeing for performed duty",
146
+ "Std 3. Training and Education",
147
+ "Std 4. Responsibilities and management practices",
148
+ "Std 5. Internal Review",
149
+ "Std 6. Records and Documentation",
150
+ "Std 7. Workplace conditions"
151
+ ],
152
+ "priority": 60,
153
+ "context_keywords": ["fatigue"]
154
+ },
155
+ "Maintenance Management Summary": {
156
+ "headings": [
157
+ {"level": 1, "text": "Audit Observations and Comments"},
158
+ {"level": 2, "text": "Maintenance Management Summary of Audit findings"}
159
+ ],
160
+ "orientation": "left",
161
+ "columns": ["MAINTENANCE MANAGEMENT", "DETAILS"],
162
+ "labels": [
163
+ "Std 1. Daily Check",
164
+ "Std 2. Fault Recording and Reporting",
165
+ "Std 3. Fault Repair",
166
+ "Std 4. Maintenance Schedules and Methods",
167
+ "Std 5. Records and Documentation",
168
+ "Std 6. Responsibilities",
169
+ "Std 7. Internal Review",
170
+ "Std 8. Training and Education"
171
+ ],
172
+ "priority": 70
173
+ },
174
+ "Mass Management Summary": {
175
+ "headings": [
176
+ {"level": 1, "text": "Mass Management Summary of Audit findings"}
177
+ ],
178
+ "orientation": "left",
179
+ "columns": ["MASS MANAGEMENT", "DETAILS"],
180
+ "labels": [
181
+ "Std 1. Responsibilities",
182
+ "Std 2. Vehicle Control",
183
+ "Std 3. Vehicle Use",
184
+ "Std 4. Records and Documentation",
185
+ "Std 5. Verification",
186
+ "Std 6. Internal Review",
187
+ "Std 7. Training and Education",
188
+ "Std 8. Maintenance of Suspension"
189
+ ],
190
+ "priority": 70
191
+ },
192
+ "Fatigue Management Summary": {
193
+ "headings": [
194
+ {"level": 1, "text": "Fatigue Management Summary of Audit findings"}
195
+ ],
196
+ "orientation": "left",
197
+ "columns": ["FATIGUE MANAGEMENT", "DETAILS"],
198
+ "labels": [
199
+ "Std 1. Scheduling and Rostering",
200
+ "Std 2. Health and wellbeing for performed duty",
201
+ "Std 3. Training and Education",
202
+ "Std 4. Responsibilities and management practices",
203
+ "Std 5. Internal Review",
204
+ "Std 6. Records and Documentation",
205
+ "Std 7. Workplace conditions"
206
+ ],
207
+ "priority": 70
208
+ },
209
+ "Vehicle Registration Numbers Maintenance": {
210
+ "headings": [
211
+ {"level": 1, "text": "Vehicle Registration Numbers of Records Examined"},
212
+ {"level": 2, "text": "Maintenance Management"}
213
+ ],
214
+ "orientation": "row1",
215
+ "labels": [
216
+ "No.", "Registration Number", "Roadworthiness Certificates",
217
+ "Maintenance Records", "Daily Checks",
218
+ "Fault Recording/ Reporting", "Fault Repair"
219
+ ],
220
+ "priority": 80,
221
+ "context_keywords": ["maintenance", "vehicle registration"]
222
+ },
223
+ "Vehicle Registration Numbers Mass": {
224
+ "headings": [
225
+ {"level": 1, "text": "Vehicle Registration Numbers of Records Examined"},
226
+ {"level": 2, "text": "MASS MANAGEMENT"}
227
+ ],
228
+ "orientation": "row1",
229
+ "labels": [
230
+ "No.", "Registration Number", "Sub contractor",
231
+ "Sub-contracted Vehicles Statement of Compliance",
232
+ "Weight Verification Records",
233
+ "RFS Suspension Certification #",
234
+ "Suspension System Maintenance", "Trip Records",
235
+ "Fault Recording/ Reporting on Suspension System"
236
+ ],
237
+ "priority": 80,
238
+ "context_keywords": ["mass", "vehicle registration"]
239
+ },
240
+ "Driver / Scheduler Records Examined": {
241
+ "headings": [
242
+ {"level": 1, "text": "Driver / Scheduler Records Examined"},
243
+ {"level": 2, "text": "FATIGUE MANAGEMENT"},
244
+ ],
245
+ "orientation": "row1",
246
+ "labels": [
247
+ "No.",
248
+ "Driver / Scheduler Name",
249
+ "Driver TLIF Course # Completed",
250
+ "Scheduler TLIF Course # Completed",
251
+ "Medical Certificates (Current Yes/No) Date of expiry",
252
+ "Roster / Schedule / Safe Driving Plan (Date Range)",
253
+ "Fit for Duty Statement Completed (Yes/No)",
254
+ "Work Diary Pages (Page Numbers) Electronic Work Diary Records (Date Range)"
255
+ ],
256
+ "priority": 80,
257
+ "context_keywords": ["driver", "scheduler", "fatigue"]
258
+ },
259
+ "Operator's Name (legal entity)": {
260
+ "headings": [
261
+ {"level": 1, "text": "CORRECTIVE ACTION REQUEST (CAR)"}
262
+ ],
263
+ "orientation": "left",
264
+ "labels": ["Operator's Name (legal entity)"],
265
+ "priority": 85
266
+ },
267
+ "Non-conformance and CAR details": {
268
+ "orientation": "left",
269
+ "labels": [
270
+ "Non-conformance agreed close out date",
271
+ "Module and Standard",
272
+ "Corrective Action Request (CAR) Number",
273
+ "Observed Non-conformance:",
274
+ "Corrective Action taken or to be taken by operator:",
275
+ "Operator or Representative Signature",
276
+ "Position",
277
+ "Date",
278
+ "Comments:",
279
+ "Auditor signature",
280
+ "Date"
281
+ ],
282
+ "priority": 75,
283
+ "context_keywords": ["non-conformance", "corrective action"]
284
+ },
285
+ "NHVAS Approved Auditor Declaration": {
286
+ "headings": [
287
+ {"level": 1, "text": "NHVAS APPROVED AUDITOR DECLARATION"}
288
+ ],
289
+ "orientation": "row1",
290
+ "labels": ["Print Name", "NHVR or Exemplar Global Auditor Registration Number"],
291
+ "priority": 90,
292
+ "context_exclusions": ["manager", "operator declaration"]
293
+ },
294
+ "Audit Declaration dates": {
295
+ "headings": [
296
+ {"level": 1, "text": "Audit Declaration dates"}
297
+ ],
298
+ "orientation": "left",
299
+ "labels": [
300
+ "Audit was conducted on",
301
+ "Unconditional CARs closed out on:",
302
+ "Conditional CARs to be closed out by:"
303
+ ],
304
+ "priority": 80
305
+ },
306
+ "Print accreditation name": {
307
+ "headings": [
308
+ {"level": 1, "text": "(print accreditation name)"}
309
+ ],
310
+ "orientation": "left",
311
+ "labels": ["(print accreditation name)"],
312
+ "priority": 85
313
+ },
314
+ "Operator Declaration": {
315
+ "headings": [
316
+ {"level": 1, "text": "Operator Declaration"}
317
+ ],
318
+ "orientation": "row1",
319
+ "labels": ["Print Name", "Position Title"],
320
+ "priority": 90,
321
+ "context_keywords": ["operator declaration", "manager"],
322
+ "context_exclusions": ["auditor", "nhvas approved"]
323
+ }
324
+ }
325
+
326
+ # 2. Enhanced heading detection patterns
327
+ HEADING_PATTERNS = {
328
+ "main": [
329
+ r"NHVAS\s+Audit\s+Summary\s+Report",
330
+ r"NATIONAL\s+HEAVY\s+VEHICLE\s+ACCREDITATION\s+AUDIT\s+SUMMARY\s+REPORT",
331
+ r"NHVAS\s+AUDIT\s+SUMMARY\s+REPORT"
332
+ ],
333
+ "sub": [
334
+ r"AUDIT\s+OBSERVATIONS\s+AND\s+COMMENTS",
335
+ r"MAINTENANCE\s+MANAGEMENT",
336
+ r"MASS\s+MANAGEMENT",
337
+ r"Fatigue\s+Management\s+Summary\s+of\s+Audit\s+findings",
338
+ r"MAINTENANCE\s+MANAGEMENT\s+SUMMARY\s+OF\s+AUDIT\s+FINDINGS",
339
+ r"MASS\s+MANAGEMENT\s+SUMMARY\s+OF\s+AUDIT\s+FINDINGS",
340
+ r"Vehicle\s+Registration\s+Numbers\s+of\s+Records\s+Examined",
341
+ r"CORRECTIVE\s+ACTION\s+REQUEST\s+\(CAR\)",
342
+ r"NHVAS\s+APPROVED\s+AUDITOR\s+DECLARATION",
343
+ r"Operator\s+Declaration",
344
+ r"Operator\s+Information"
345
+ ]
346
+ }
347
+
348
+ # 3. Enhanced paragraph patterns for key narrative sections
349
+ PARAGRAPH_PATTERNS = {
350
+ "findings_summary": r"Provide a summary of findings based on the evidence gathered during the audit\.",
351
+ "declaration_text": r"I hereby acknowledge and agree with the findings.*",
352
+ "introductory_note": r"This audit assesses the.*",
353
+ "date_line": r"^\s*\d{1,2}(?:st|nd|rd|th)?\s+[A-Za-z]+\s+\d{4}\s*$|^Date$"
354
+ }