workwhileweb commited on
Commit
19cd81c
·
verified ·
1 Parent(s): 7ac5f0e

Update core/extractor.py

Browse files
Files changed (1) hide show
  1. core/extractor.py +19 -0
core/extractor.py CHANGED
@@ -147,6 +147,7 @@ class Extractor:
147
  result['ID_number'] = _idnumber
148
  result['Name'] = ''
149
  result['Date_of_birth'] = ''
 
150
  result['Gender'] = ''
151
  result['Nationality'] = ''
152
  result['Place_of_origin'] = ''
@@ -176,6 +177,24 @@ class Extractor:
176
  result['Date_of_birth_box'] = DOB[1] if DOB else []
177
  continue
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  if re.search(r'sinh|birth|bith', s) and (not result['Date_of_birth']):
180
  if re.search(regex_dob, s):
181
  DOB = _results[i]
 
147
  result['ID_number'] = _idnumber
148
  result['Name'] = ''
149
  result['Date_of_birth'] = ''
150
+ result['Date_of_issue'] = ''
151
  result['Gender'] = ''
152
  result['Nationality'] = ''
153
  result['Place_of_origin'] = ''
 
177
  result['Date_of_birth_box'] = DOB[1] if DOB else []
178
  continue
179
 
180
+ if re.search(r'month', s) and (not result['Date_of_issue']):
181
+ if re.search(regex_dob, s):
182
+ DOI = _results[i]
183
+
184
+ elif re.search(regex_dob, _results[i - 1][0]):
185
+ DOI = _results[i - 1]
186
+
187
+ elif re.search(regex_dob, _results[i + 1][0]):
188
+ DOI = _results[i + 1]
189
+
190
+ else:
191
+ DOI = []
192
+
193
+ result['Date_of_issue'] = (re.split(r':|\s+', DOI[0]))[-1].strip() if DOI else ''
194
+ result['Date_of_issue_box'] = DOI[1] if DOI else []
195
+
196
+ continue
197
+
198
  if re.search(r'sinh|birth|bith', s) and (not result['Date_of_birth']):
199
  if re.search(regex_dob, s):
200
  DOB = _results[i]