anicolson commited on
Commit
ae934ba
1 Parent(s): 4a744a8

Upload model

Browse files
Files changed (5) hide show
  1. README.md +3 -3
  2. config.json +4 -0
  3. create_section_files.py +150 -0
  4. modelling_cxrmate_ed.py +33 -9
  5. section_parser.py +281 -0
README.md CHANGED
@@ -1,4 +1,7 @@
1
  ---
 
 
 
2
  library_name: transformers
3
  tags:
4
  - chest X-ray report generation
@@ -16,9 +19,6 @@ tags:
16
  - patient records
17
  - mimic-cxr
18
  - mimic-iv-ed
19
- license: apache-2.0
20
- language:
21
- - en
22
  ---
23
 
24
  # CXRMate-ED: The Impact of Auxiliary Patient Data on Automated Chest X-Ray Report Generation and How to Incorporate It
 
1
  ---
2
+ language:
3
+ - en
4
+ license: apache-2.0
5
  library_name: transformers
6
  tags:
7
  - chest X-ray report generation
 
19
  - patient records
20
  - mimic-cxr
21
  - mimic-iv-ed
 
 
 
22
  ---
23
 
24
  # CXRMate-ED: The Impact of Auxiliary Patient Data on Automated Chest X-Ray Report Generation and How to Incorporate It
config.json CHANGED
@@ -85,6 +85,10 @@
85
  "rms_norm_eps": 1e-06,
86
  "rope_scaling": null,
87
  "rope_theta": 10000.0,
 
 
 
 
88
  "sep_token_id": null,
89
  "suppress_tokens": null,
90
  "task_specific_params": null,
 
85
  "rms_norm_eps": 1e-06,
86
  "rope_scaling": null,
87
  "rope_theta": 10000.0,
88
+ "section_ids": [
89
+ 12,
90
+ 13
91
+ ],
92
  "sep_token_id": null,
93
  "suppress_tokens": null,
94
  "task_specific_params": null,
create_section_files.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import os
3
+ from pathlib import Path
4
+
5
+ from tqdm import tqdm
6
+
7
+ # local folder import
8
+ from .section_parser import custom_mimic_cxr_rules, section_text
9
+
10
+
11
+ def list_rindex(l, s):
12
+ """
13
+ Source: https://github.com/MIT-LCP/mimic-cxr/blob/master/txt/create_section_files.py
14
+ """
15
+
16
+ """Helper function: *last* matching element in a list"""
17
+ return len(l) - l[-1::-1].index(s) - 1
18
+
19
+
20
+ def create_section_files(reports_path, output_path, no_split):
21
+ """
22
+ Modification of: https://github.com/MIT-LCP/mimic-cxr/blob/master/txt/create_section_files.py
23
+ """
24
+
25
+ reports_path = Path(reports_path)
26
+ output_path = Path(output_path)
27
+
28
+ if not output_path.exists():
29
+ output_path.mkdir()
30
+
31
+ # not all reports can be automatically sectioned
32
+ # we load in some dictionaries which have manually determined sections
33
+ custom_section_names, custom_indices = custom_mimic_cxr_rules()
34
+
35
+ # get all higher up folders (p00, p01, etc)
36
+ p_grp_folders = os.listdir(reports_path)
37
+ p_grp_folders = [p for p in p_grp_folders
38
+ if p.startswith('p') and len(p) == 3]
39
+ p_grp_folders.sort()
40
+
41
+ # patient_studies will hold the text for use in NLP labeling
42
+ patient_studies = []
43
+
44
+ # study_sections will have an element for each study
45
+ # this element will be a list, each element having text for a specific section
46
+ study_sections = []
47
+ for p_grp in p_grp_folders:
48
+ # get patient folders, usually around ~6k per group folder
49
+ cxr_path = reports_path / p_grp
50
+ p_folders = os.listdir(cxr_path)
51
+ p_folders = [p for p in p_folders if p.startswith('p')]
52
+ p_folders.sort()
53
+
54
+ # For each patient in this grouping folder
55
+ print(p_grp)
56
+ for p in tqdm(p_folders):
57
+ patient_path = cxr_path / p
58
+
59
+ # get the filename for all their free-text reports
60
+ studies = os.listdir(patient_path)
61
+ studies = [s for s in studies
62
+ if s.endswith('.txt') and s.startswith('s')]
63
+
64
+ for s in studies:
65
+ # load in the free-text report
66
+ with open(patient_path / s, 'r') as fp:
67
+ text = ''.join(fp.readlines())
68
+
69
+ # get study string name without the txt extension
70
+ s_stem = s[0:-4]
71
+
72
+ # custom rules for some poorly formatted reports
73
+ if s_stem in custom_indices:
74
+ idx = custom_indices[s_stem]
75
+ patient_studies.append([s_stem, text[idx[0]:idx[1]]])
76
+ continue
77
+
78
+ # split text into sections
79
+ sections, section_names, section_idx = section_text(text)
80
+
81
+ # check to see if this has mis-named sections
82
+ # e.g. sometimes the impression is in the comparison section
83
+ if s_stem in custom_section_names:
84
+ sn = custom_section_names[s_stem]
85
+ idx = list_rindex(section_names, sn)
86
+ patient_studies.append([s_stem, sections[idx].strip()])
87
+ continue
88
+
89
+ # grab the *last* section with the given title
90
+ # prioritizes impression > findings, etc.
91
+
92
+ # "last_paragraph" is text up to the end of the report
93
+ # many reports are simple, and have a single section
94
+ # header followed by a few paragraphs
95
+ # these paragraphs are grouped into section "last_paragraph"
96
+
97
+ # note also comparison seems unusual but if no other sections
98
+ # exist the radiologist has usually written the report
99
+ # in the comparison section
100
+ idx = -1
101
+ for sn in ('impression', 'findings', 'indication', 'history', 'last_paragraph', 'comparison'):
102
+ if sn in section_names:
103
+ idx = list_rindex(section_names, sn)
104
+ break
105
+
106
+ if idx == -1:
107
+ # we didn't find any sections we can use :(
108
+ patient_studies.append([s_stem, ''])
109
+ print(f'no impression/findings: {patient_path / s}')
110
+ else:
111
+ # store the text of the conclusion section
112
+ patient_studies.append([s_stem, sections[idx].strip()])
113
+
114
+ study_sectioned = [s_stem]
115
+ for sn in ('impression', 'findings', 'indication', 'history', 'last_paragraph', 'comparison'):
116
+ if sn in section_names:
117
+ idx = list_rindex(section_names, sn)
118
+ study_sectioned.append(sections[idx].strip())
119
+ else:
120
+ study_sectioned.append(None)
121
+ study_sections.append(study_sectioned)
122
+ # write distinct files to facilitate modular processing
123
+ if len(patient_studies) > 0:
124
+ # write out a single CSV with the sections
125
+ with open(output_path / 'mimic_cxr_sectioned.csv', 'w') as fp:
126
+ csvwriter = csv.writer(fp)
127
+ # write header
128
+ csvwriter.writerow(['study', 'impression', 'findings', 'indication', 'history', 'last_paragraph', 'comparison'])
129
+ for row in study_sections:
130
+ csvwriter.writerow(row)
131
+
132
+ if no_split:
133
+ # write all the reports out to a single file
134
+ with open(output_path / f'mimic_cxr_sections.csv', 'w') as fp:
135
+ csvwriter = csv.writer(fp)
136
+ for row in patient_studies:
137
+ csvwriter.writerow(row)
138
+ else:
139
+ # write ~22 files with ~10k reports each
140
+ n = 0
141
+ jmp = 10000
142
+
143
+ while n < len(patient_studies):
144
+ n_fn = n // jmp
145
+ with open(output_path / f'mimic_cxr_{n_fn:02d}.csv', 'w') as fp:
146
+ csvwriter = csv.writer(fp)
147
+ for row in patient_studies[n:n+jmp]:
148
+ csvwriter.writerow(row)
149
+ n += jmp
150
+
modelling_cxrmate_ed.py CHANGED
@@ -1,12 +1,8 @@
1
- import csv
2
- import functools
3
  import math
4
  import os
5
- import re
6
- from collections import OrderedDict
7
  from glob import glob
8
  from pathlib import Path
9
- from typing import Dict, List, Optional, Tuple, Union
10
 
11
  import duckdb
12
  import pandas as pd
@@ -24,10 +20,11 @@ from transformers.models.vision_encoder_decoder.configuration_vision_encoder_dec
24
  )
25
  from transformers.utils import logging
26
 
 
27
  from .dataset import StudyIDEDStayIDSubset
28
  from .modelling_uniformer import MultiUniFormerWithProjectionHead
29
  from .records import EDCXRSubjectRecords
30
- from .tables import ed_module_tables
31
 
32
  logger = logging.get_logger(__name__)
33
 
@@ -940,7 +937,7 @@ class MIMICIVEDCXRMultimodalModel(VisionEncoderDecoderModel):
940
  "Please download them using wget -r -N -c -np --reject dcm --user <username> --ask-password https://physionet.org/files/mimic-cxr/2.0.0/"""
941
 
942
  print('Extracting sections from reports...')
943
- create_sectioned_files(
944
  reports_path=os.path.join(physionet_dir, 'mimic-cxr', '2.0.0', 'files'),
945
  output_path=sectioned_dir,
946
  no_split=True,
@@ -1009,8 +1006,8 @@ class MIMICIVEDCXRMultimodalModel(VisionEncoderDecoderModel):
1009
 
1010
  connect.sql(f"CREATE OR REPLACE TABLE mimic_cxr AS SELECT * FROM df")
1011
 
1012
- # Create lookup tables (do this only for ED tables, as the MIMIC-CXR metadata table is not useful):
1013
- for k, v in ed_module_tables.items():
1014
  if v.load and v.index_columns:
1015
  start_idx = 0
1016
  for i in v.index_columns_source:
@@ -1127,3 +1124,30 @@ class MIMICIVEDCXRMultimodalModel(VisionEncoderDecoderModel):
1127
  f'No. of training dicom_ids, study_ids, & subject_ids: {dataset.num_dicom_ids},',
1128
  f'{dataset.num_study_ids}, & {dataset.num_subject_ids}.',
1129
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import math
2
  import os
 
 
3
  from glob import glob
4
  from pathlib import Path
5
+ from typing import Optional, Tuple, Union
6
 
7
  import duckdb
8
  import pandas as pd
 
20
  )
21
  from transformers.utils import logging
22
 
23
+ from .create_section_files import create_section_files
24
  from .dataset import StudyIDEDStayIDSubset
25
  from .modelling_uniformer import MultiUniFormerWithProjectionHead
26
  from .records import EDCXRSubjectRecords
27
+ from .tables import ed_module_tables, mimic_cxr_tables
28
 
29
  logger = logging.get_logger(__name__)
30
 
 
937
  "Please download them using wget -r -N -c -np --reject dcm --user <username> --ask-password https://physionet.org/files/mimic-cxr/2.0.0/"""
938
 
939
  print('Extracting sections from reports...')
940
+ create_section_files(
941
  reports_path=os.path.join(physionet_dir, 'mimic-cxr', '2.0.0', 'files'),
942
  output_path=sectioned_dir,
943
  no_split=True,
 
1006
 
1007
  connect.sql(f"CREATE OR REPLACE TABLE mimic_cxr AS SELECT * FROM df")
1008
 
1009
+ # Create lookup tables:
1010
+ for k, v in (ed_module_tables | mimic_cxr_tables).items():
1011
  if v.load and v.index_columns:
1012
  start_idx = 0
1013
  for i in v.index_columns_source:
 
1124
  f'No. of training dicom_ids, study_ids, & subject_ids: {dataset.num_dicom_ids},',
1125
  f'{dataset.num_study_ids}, & {dataset.num_subject_ids}.',
1126
  )
1127
+ return dataset
1128
+
1129
+ @staticmethod
1130
+ def collate_fn(batch):
1131
+ keys = set().union(*(d.keys() for d in batch))
1132
+ batch = {j: [i.setdefault(j, None) for i in batch] for j in keys}
1133
+ batch['images'] = torch.nn.utils.rnn.pad_sequence(batch['images'], batch_first=True, padding_value=0.0)
1134
+
1135
+ for k in keys:
1136
+ if 'index_value_feats' in k:
1137
+
1138
+ total_indices = next(i for i in batch[k] if i is not None).shape[-1]
1139
+ batch[k] = [i if i is not None else torch.empty(0, total_indices) for i in batch[k]]
1140
+ batch[k] = torch.nn.utils.rnn.pad_sequence(batch[k], batch_first=True, padding_value=-1) # Pad value of -1 is not ideal. Need to use something else.
1141
+ token_type_id_name = k.replace('_feats', '_token_type_ids')
1142
+ batch[token_type_id_name] = [i if i is not None else torch.empty(0, dtype=torch.long) for i in batch[token_type_id_name]]
1143
+ batch[token_type_id_name] = torch.nn.utils.rnn.pad_sequence(
1144
+ batch[token_type_id_name], batch_first=True, padding_value=0,
1145
+ )
1146
+ mask_name = k.replace('_feats', '_mask')
1147
+ batch[mask_name] = (batch[k] != -1).any(dim=-1).int()
1148
+
1149
+ if 'time_delta' in k and 'index_value' in k:
1150
+ batch[k] = [i if i is not None else torch.empty(0, 1) for i in batch[k]]
1151
+ batch[k] = torch.nn.utils.rnn.pad_sequence(batch[k], batch_first=True, padding_value=0)
1152
+
1153
+ return batch
section_parser.py ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+
4
+ def section_text(text):
5
+ """
6
+ Source: https://github.com/MIT-LCP/mimic-cxr/blob/master/LICENSE
7
+ """
8
+
9
+ """Splits text into sections.
10
+
11
+ Assumes text is in a radiology report format, e.g.:
12
+
13
+ COMPARISON: Chest radiograph dated XYZ.
14
+
15
+ IMPRESSION: ABC...
16
+
17
+ Given text like this, it will output text from each section,
18
+ where the section type is determined by the all caps header.
19
+
20
+ Returns a three element tuple:
21
+ sections - list containing the text of each section
22
+ section_names - a normalized version of the section name
23
+ section_idx - list of start indices of the text in the section
24
+ """
25
+ p_section = re.compile(
26
+ r'\n ([A-Z ()/,-]+):\s', re.DOTALL)
27
+
28
+ sections = list()
29
+ section_names = list()
30
+ section_idx = list()
31
+
32
+ idx = 0
33
+ s = p_section.search(text, idx)
34
+
35
+ if s:
36
+ sections.append(text[0:s.start(1)])
37
+ section_names.append('preamble')
38
+ section_idx.append(0)
39
+
40
+ while s:
41
+ current_section = s.group(1).lower()
42
+ # get the start of the text for this section
43
+ idx_start = s.end()
44
+ # skip past the first newline to avoid some bad parses
45
+ idx_skip = text[idx_start:].find('\n')
46
+ if idx_skip == -1:
47
+ idx_skip = 0
48
+
49
+ s = p_section.search(text, idx_start + idx_skip)
50
+
51
+ if s is None:
52
+ idx_end = len(text)
53
+ else:
54
+ idx_end = s.start()
55
+
56
+ sections.append(text[idx_start:idx_end])
57
+ section_names.append(current_section)
58
+ section_idx.append(idx_start)
59
+
60
+ else:
61
+ sections.append(text)
62
+ section_names.append('full report')
63
+ section_idx.append(0)
64
+
65
+ section_names = normalize_section_names(section_names)
66
+
67
+ # remove empty sections
68
+ # this handles when the report starts with a finding-like statement
69
+ # .. but this statement is not a section, more like a report title
70
+ # e.g. p10/p10103318/s57408307
71
+ # CHEST, PA LATERAL:
72
+ #
73
+ # INDICATION: This is the actual section ....
74
+ # it also helps when there are multiple findings sections
75
+ # usually one is empty
76
+ for i in reversed(range(len(section_names))):
77
+ if section_names[i] in ('impression', 'findings'):
78
+ if sections[i].strip() == '':
79
+ sections.pop(i)
80
+ section_names.pop(i)
81
+ section_idx.pop(i)
82
+
83
+ if ('impression' not in section_names) & ('findings' not in section_names):
84
+ # create a new section for the final paragraph
85
+ if '\n \n' in sections[-1]:
86
+ sections.append('\n \n'.join(sections[-1].split('\n \n')[1:]))
87
+ sections[-2] = sections[-2].split('\n \n')[0]
88
+ section_names.append('last_paragraph')
89
+ section_idx.append(section_idx[-1] + len(sections[-2]))
90
+
91
+ return sections, section_names, section_idx
92
+
93
+
94
+ def normalize_section_names(section_names):
95
+ """
96
+ Source: https://github.com/MIT-LCP/mimic-cxr/blob/master/LICENSE
97
+ """
98
+
99
+ # first, lower case all
100
+ section_names = [s.lower().strip() for s in section_names]
101
+
102
+ frequent_sections = {
103
+ "preamble": "preamble", # 227885
104
+ "impression": "impression", # 187759
105
+ "comparison": "comparison", # 154647
106
+ "indication": "indication", # 153730
107
+ "findings": "findings", # 149842
108
+ "examination": "examination", # 94094
109
+ "technique": "technique", # 81402
110
+ "history": "history", # 45624
111
+ "comparisons": "comparison", # 8686
112
+ "clinical history": "history", # 7121
113
+ "reason for examination": "indication", # 5845
114
+ "notification": "notification", # 5749
115
+ "reason for exam": "indication", # 4430
116
+ "clinical information": "history", # 4024
117
+ "exam": "examination", # 3907
118
+ "clinical indication": "indication", # 1945
119
+ "conclusion": "impression", # 1802
120
+ "chest, two views": "findings", # 1735
121
+ "recommendation(s)": "recommendations", # 1700
122
+ "type of examination": "examination", # 1678
123
+ "reference exam": "comparison", # 347
124
+ "patient history": "history", # 251
125
+ "addendum": "addendum", # 183
126
+ "comparison exam": "comparison", # 163
127
+ "date": "date", # 108
128
+ "comment": "comment", # 88
129
+ "findings and impression": "impression", # 87
130
+ "wet read": "wet read", # 83
131
+ "comparison film": "comparison", # 79
132
+ "recommendations": "recommendations", # 72
133
+ "findings/impression": "impression", # 47
134
+ "pfi": "history",
135
+ 'recommendation': 'recommendations',
136
+ 'wetread': 'wet read',
137
+ 'ndication': 'impression', # 1
138
+ 'impresson': 'impression', # 2
139
+ 'imprression': 'impression', # 1
140
+ 'imoression': 'impression', # 1
141
+ 'impressoin': 'impression', # 1
142
+ 'imprssion': 'impression', # 1
143
+ 'impresion': 'impression', # 1
144
+ 'imperssion': 'impression', # 1
145
+ 'mpression': 'impression', # 1
146
+ 'impession': 'impression', # 3
147
+ 'findings/ impression': 'impression', # ,1
148
+ 'finding': 'findings', # ,8
149
+ 'findins': 'findings',
150
+ 'findindgs': 'findings', # ,1
151
+ 'findgings': 'findings', # ,1
152
+ 'findngs': 'findings', # ,1
153
+ 'findnings': 'findings', # ,1
154
+ 'finidngs': 'findings', # ,2
155
+ 'idication': 'indication', # ,1
156
+ 'reference findings': 'findings', # ,1
157
+ 'comparision': 'comparison', # ,2
158
+ 'comparsion': 'comparison', # ,1
159
+ 'comparrison': 'comparison', # ,1
160
+ 'comparisions': 'comparison' # ,1
161
+ }
162
+
163
+ p_findings = [
164
+ 'chest',
165
+ 'portable',
166
+ 'pa and lateral',
167
+ 'lateral and pa',
168
+ 'ap and lateral',
169
+ 'lateral and ap',
170
+ 'frontal and',
171
+ 'two views',
172
+ 'frontal view',
173
+ 'pa view',
174
+ 'ap view',
175
+ 'one view',
176
+ 'lateral view',
177
+ 'bone window',
178
+ 'frontal upright',
179
+ 'frontal semi-upright',
180
+ 'ribs',
181
+ 'pa and lat'
182
+ ]
183
+ p_findings = re.compile('({})'.format('|'.join(p_findings)))
184
+
185
+ main_sections = [
186
+ 'impression', 'findings', 'history', 'comparison',
187
+ 'addendum'
188
+ ]
189
+ for i, s in enumerate(section_names):
190
+ if s in frequent_sections:
191
+ section_names[i] = frequent_sections[s]
192
+ continue
193
+
194
+ main_flag = False
195
+ for m in main_sections:
196
+ if m in s:
197
+ section_names[i] = m
198
+ main_flag = True
199
+ break
200
+ if main_flag:
201
+ continue
202
+
203
+ m = p_findings.search(s)
204
+ if m is not None:
205
+ section_names[i] = 'findings'
206
+
207
+ # if it looks like it is describing the entire study
208
+ # it's equivalent to findings
209
+ # group similar phrasings for impression
210
+
211
+ return section_names
212
+
213
+
214
+ def custom_mimic_cxr_rules():
215
+ """
216
+ Source: https://github.com/MIT-LCP/mimic-cxr/blob/master/LICENSE
217
+ """
218
+ custom_section_names = {
219
+ 's50913680': 'recommendations', # files/p11/p11851243/s50913680.txt
220
+ 's59363654': 'examination', # files/p12/p12128253/s59363654.txt
221
+ 's59279892': 'technique', # files/p13/p13150370/s59279892.txt
222
+ 's59768032': 'recommendations', # files/p13/p13249077/s59768032.txt
223
+ 's57936451': 'indication', # files/p14/p14325424/s57936451.txt
224
+ 's50058765': 'indication', # files/p14/p14731346/s50058765.txt
225
+ 's53356173': 'examination', # files/p15/p15898350/s53356173.txt
226
+ 's53202765': 'technique', # files/p16/p16076182/s53202765.txt
227
+ 's50808053': 'technique', # files/p16/p16631485/s50808053.txt
228
+ 's51966317': 'indication', # files/p10/p10817099/s51966317.txt
229
+ 's50743547': 'examination', # files/p11/p11388341/s50743547.txt
230
+ 's56451190': 'note', # files/p11/p11842879/s56451190.txt
231
+ 's59067458': 'recommendations', # files/p11/p11984647/s59067458.txt
232
+ 's59215320': 'examination', # files/p12/p12408912/s59215320.txt
233
+ 's55124749': 'indication', # files/p12/p12428492/s55124749.txt
234
+ 's54365831': 'indication', # files/p13/p13876470/s54365831.txt
235
+ 's59087630': 'recommendations', # files/p14/p14267880/s59087630.txt
236
+ 's58157373': 'recommendations', # files/p15/p15032392/s58157373.txt
237
+ 's56482935': 'recommendations', # files/p15/p15388421/s56482935.txt
238
+ 's58375018': 'recommendations', # files/p15/p15505556/s58375018.txt
239
+ 's54654948': 'indication', # files/p17/p17090359/s54654948.txt
240
+ 's55157853': 'examination', # files/p18/p18975498/s55157853.txt
241
+ 's51491012': 'history', # files/p19/p19314266/s51491012.txt
242
+
243
+ }
244
+
245
+ custom_indices = {
246
+ 's50525523': [201, 349], # files/p10/p10602608/s50525523.txt
247
+ 's57564132': [233, 554], # files/p10/p10637168/s57564132.txt
248
+ 's59982525': [313, 717], # files/p11/p11989982/s59982525.txt
249
+ 's53488209': [149, 475], # files/p12/p12458657/s53488209.txt
250
+ 's54875119': [234, 988], # files/p13/p13687044/s54875119.txt
251
+ 's50196495': [59, 399], # files/p13/p13894879/s50196495.txt
252
+ 's56579911': [59, 218], # files/p15/p15394326/s56579911.txt
253
+ 's52648681': [292, 631], # files/p15/p15666238/s52648681.txt
254
+ 's59889364': [172, 453], # files/p15/p15835529/s59889364.txt
255
+ 's53514462': [73, 377], # files/p16/p16297706/s53514462.txt
256
+ 's59505494': [59, 450], # files/p16/p16730991/s59505494.txt
257
+ 's53182247': [59, 412], # files/p16/p16770442/s53182247.txt
258
+ 's51410602': [47, 320], # files/p17/p17069955/s51410602.txt
259
+ 's56412866': [522, 822], # files/p17/p17612000/s56412866.txt
260
+ 's54986978': [59, 306], # files/p17/p17912487/s54986978.txt
261
+ 's59003148': [262, 505], # files/p17/p17916384/s59003148.txt
262
+ 's57150433': [61, 394], # files/p18/p18335791/s57150433.txt
263
+ 's56760320': [219, 457], # files/p18/p18418794/s56760320.txt
264
+ 's59562049': [158, 348], # files/p18/p18502016/s59562049.txt
265
+ 's52674888': [145, 296], # files/p19/p19381919/s52674888.txt
266
+ 's55258338': [192, 568], # files/p13/p13719117/s55258338.txt
267
+ 's59330497': [140, 655], # files/p15/p15479218/s59330497.txt
268
+ 's52119491': [179, 454], # files/p17/p17959278/s52119491.txt
269
+ # below have no findings at all in the entire report
270
+ 's58235663': [0, 0], # files/p11/p11573679/s58235663.txt
271
+ 's50798377': [0, 0], # files/p12/p12632853/s50798377.txt
272
+ 's54168089': [0, 0], # files/p14/p14463099/s54168089.txt
273
+ 's53071062': [0, 0], # files/p15/p15774521/s53071062.txt
274
+ 's56724958': [0, 0], # files/p16/p16175671/s56724958.txt
275
+ 's54231141': [0, 0], # files/p16/p16312859/s54231141.txt
276
+ 's53607029': [0, 0], # files/p17/p17603668/s53607029.txt
277
+ 's52035334': [0, 0], # files/p19/p19349312/s52035334.txt
278
+ }
279
+
280
+ return custom_section_names, custom_indices
281
+