Sage commited on
Commit
3dd785b
1 Parent(s): fd573a1

Big Commit

Browse files
Files changed (11) hide show
  1. RPFAA Building P1.json +0 -1
  2. TDRP.json +53 -0
  3. ai_functions.py +106 -0
  4. app.log +0 -678
  5. app.py +33 -213
  6. gr.py +0 -11
  7. helpers.py +106 -0
  8. ocr_functions.py +69 -0
  9. output.json +0 -146
  10. requirements.txt +0 -0
  11. settings.py +58 -16
RPFAA Building P1.json DELETED
@@ -1 +0,0 @@
1
- [{"File Name": "DARAGA-ALCALA-0017", "General Information": {"ARP No.": "2017-04-0000-00009", "Owner": "Rudy Madrona", "Address": "", "Tel No.": "", "Administrator/Beneficial User": "", "Address:": "", "Tel No.:": "", "PIN": "0310 400 301008-1001", "TIN_1": "", "TIN_2": ""}, "Building Location": {"No. / Street": "", "Brgy/District": "", "Municipality": "", "Province/city": ""}, "Land Reference": {"Owner": "", "OCT/TCT/CLOA NO.": "", "Lot No.": "", "Survey No.": "", "Blk No.": "", "TD/ARP No.:": "", "Area": ""}, "Property Appraisal": {"Kind of Bldg": "", "Structural Type": "", "Bldg. Permit No.": "", "Date Issued": "", "Condominium Certificate of Title(CCT)": "", "Certificate of Completion Issued on": "", "Certificate of Occupancy Issued on": "", "Date Constructed/Completed": "", "Date Occupied": "", "Bldg. Age": "", "No. of Storeys": "", "Area of 1st Flr": "", "Area of 2nd Flr": "", "Area of 3rd Flr": "", "Area of 4th Flr": "", "Total Floor Area": ""}}, {"File Name": "DARAGA-ALCALA-0033", "General Information": {"ARP No.": "2011-64-0003-00017", "Owner": "LLANTOS, JULIAN", "Address": "", "Tel No.": "", "Administrator/Beneficial User": "", "Address:": "", "Tel No.:": "", "PIN": "0310 400301012-100", "TIN_1": "", "TIN_2": ""}, "Building Location": {"No. / Street": "", "Brgy/District": "", "Municipality": "", "Province/city": ""}, "Land Reference": {"Owner": "", "OCT/TCT/CLOA NO.": "", "Lot No.": "", "Survey No.": "", "Blk No.": "", "TD/ARP No.:": "", "Area": ""}, "Property Appraisal": {"Kind of Bldg": "", "Structural Type": "", "Bldg. Permit No.": "", "Date Issued": "", "Condominium Certificate of Title(CCT)": "", "Certificate of Completion Issued on": "", "Certificate of Occupancy Issued on": "", "Date Constructed/Completed": "", "Date Occupied": "", "Bldg. Age": "", "No. of Storeys": "", "Area of 1st Flr": "", "Area of 2nd Flr": "", "Area of 3rd Flr": "", "Area of 4th Flr": "", "Total Floor Area": ""}}, {"File Name": "DARAGA-ALCALA-0071", "General Information": {"ARP No.": "2017-04-0003-000361", "Owner": "ROLANDO LISTANA", "Address": "", "Tel No.": "", "Administrator/Beneficial User": "", "Address:": "", "Tel No.:": "", "PIN": "0310400301024100", "TIN_1": "", "TIN_2": ""}, "Building Location": {"No. / Street": "", "Brgy/District": "", "Municipality": "", "Province/city": ""}, "Land Reference": {"Owner": "", "OCT/TCT/CLOA NO.": "", "Lot No.": "", "Survey No.": "", "Blk No.": "", "TD/ARP No.:": "", "Area": ""}, "Property Appraisal": {"Kind of Bldg": "", "Structural Type": "", "Bldg. Permit No.": "", "Date Issued": "", "Condominium Certificate of Title(CCT)": "", "Certificate of Completion Issued on": "", "Certificate of Occupancy Issued on": "", "Date Constructed/Completed": "", "Date Occupied": "", "Bldg. Age": "", "No. of Storeys": "", "Area of 1st Flr": "", "Area of 2nd Flr": "", "Area of 3rd Flr": "", "Area of 4th Flr": "", "Total Floor Area": ""}}]
 
 
TDRP.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"File Name": "%s",
2
+ "General Information": {
3
+ "TD No.": "",
4
+ "Property Identification No.": "",
5
+ "Owner": "",
6
+ "TIN_1": "",
7
+ "Address_1": "",
8
+ "Telephone No._1": "",
9
+ "Administrator/Beneficial User": "",
10
+ "TIN_2": "",
11
+ "Address_2": "",
12
+ "Telephone No._2": ""
13
+ },
14
+ "Location of Property": {
15
+ "Number and Street": "",
16
+ "Barangay/District": "",
17
+ "Municipality & Province/City": ""
18
+ },
19
+ "Land Reference": {
20
+ "OCT/TCT/CLOA No.": "",
21
+ "Survey No.": "",
22
+ "CCT": "",
23
+ "Lot No.": "",
24
+ "Dated": "",
25
+ "Blk No.": ""
26
+ },
27
+ "Boundaries": {
28
+ "North": "",
29
+ "South": "",
30
+ "East": "",
31
+ "West": ""
32
+ },
33
+ "Kind of Property Assessed": {
34
+ "Land": "",
35
+ "Building": "",
36
+ "No. of Storeys": "",
37
+ "Brief Description_1": "",
38
+ "Machinery": "",
39
+ "Brief Description_2": "",
40
+ "Others": "",
41
+ "Specify": ""
42
+ },
43
+ "Property Assesment": {
44
+ "Total Assessed Value": "",
45
+ "Taxable": "",
46
+ "QTR": "",
47
+ "Year": "",
48
+ "This declaration cancels TD No.": "",
49
+ "Owner": "",
50
+ "Previous A.V. Php": "",
51
+ "Memoranda": ""
52
+ }
53
+ }
ai_functions.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from settings import gpt_api_key, gpt_model, RPFAAP2, RPFAAP1, TDRP, TDRP_COORDS
2
+ import openai
3
+ import json
4
+ import logging
5
+ from helpers import remove_na, filter_tables, merge_strings
6
+ import os
7
+ logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
8
+
9
+ def chat_gpt_image(content, context):
10
+ openai.api_key = os.environ['GPT_API_KEY']
11
+ prompt = "You are an expert at identifying OCR errors and correcting them with the help of context, intuition and logic."
12
+ document = "The following text was scanned using OCR, your goal is to return a corrected version of the text"
13
+ prefix = "Additionally"
14
+ if context == "":
15
+ sequence = (document, content)
16
+ else:
17
+ sequence_1 = (prefix, context)
18
+ additional = (" ".join(sequence_1))
19
+ sequence = (additional, content)
20
+
21
+ final_content = (" ".join(sequence))
22
+ logging.info(final_content)
23
+ completion = openai.ChatCompletion.create(
24
+ model=gpt_model,
25
+ user="1",
26
+ messages=[
27
+ {"role": "system", "content": prompt},
28
+ {"role": "user", "content": final_content}
29
+ ]
30
+ )
31
+ logging.info(completion.choices[0].message.content)
32
+ return(completion.choices[0].message.content)
33
+
34
+ def chat_gpt_document(content, document_type, context):
35
+ openai.api_key = os.environ['GPT_API_KEY']
36
+ prompt = "You are an expert at identifying OCR errors and correcting them with the help of context, intuition and logic."
37
+ document_prefix = "The following text was scanned using OCR, your goal is to extract the important entities from the text and correct them with the help of the restrictions placed in the desired format. Remember to not make any changes on the labels and do not populate fields that don't have the , simply extract the text, correct it and return only the desired format. Leave the field blank if it cannot be found in the text. Text:"
38
+ additional_prefix = "Additionally the text"
39
+
40
+ pair_content = content[0]
41
+ document_content = content[1]
42
+ table_content = content[2]
43
+ content_name = content[3]
44
+
45
+ if document_type == "RPFAA Building P1":
46
+ document = "RPFAAP1.json"
47
+ desired_format = RPFAAP1
48
+ tables = [3]
49
+ input_coords = TDRP_COORDS
50
+ elif document_type == "RPFAA Building P2":
51
+ document = "RPFAAP2.json"
52
+ desired_format = RPFAAP2
53
+ tables = []
54
+ input_coords = TDRP_COORDS
55
+ elif document_type == "TDRP":
56
+ document = "TDRP.json"
57
+ desired_format = TDRP
58
+ tables = [0]
59
+ input_coords = TDRP_COORDS
60
+ else:
61
+ property_info = ["Please Select a Document Type"]
62
+ return json.dumps(property_info, indent=4)
63
+
64
+ if context == "":
65
+ sequence_1 = (document_prefix, pair_content, desired_format)
66
+ else:
67
+ sequence_1 = (document_prefix, pair_content, desired_format, additional_prefix, context)
68
+
69
+ content_1 = (" ".join(sequence_1))
70
+ logging.info(content_1)
71
+
72
+ completion_1 = openai.ChatCompletion.create(
73
+ model=gpt_model,
74
+ user="1",
75
+ messages=[
76
+ {"role": "system", "content": prompt},
77
+ {"role": "user", "content": content_1}
78
+ ]
79
+ )
80
+ logging.info(completion_1.choices[0].message.content)
81
+ input_string = remove_na(completion_1.choices[0].message.content)
82
+ input_string = merge_strings(input_string,input_coords,document_content)
83
+
84
+ with open(document) as f:
85
+ property_info = json.load(f)
86
+ #Adds the name of the file
87
+ property_info["File Name"] = content_name
88
+ #Fills in the information
89
+ for line in input_string.split('\n'):
90
+ if ':' in line:
91
+ key, value = line.split(':', 1)
92
+ key = key.strip()
93
+ for category in property_info:
94
+ if key in property_info[category]:
95
+ property_info[category][key] = value.strip()
96
+ break
97
+ else:
98
+ if key == "File Name":
99
+ property_info[key] = value.strip()
100
+
101
+ json.dumps(property_info, indent=4)
102
+ table_string = filter_tables(table_content, tables)
103
+ table_dict = json.loads(table_string)
104
+ property_info.update(table_dict)
105
+ json_string = json.dumps(property_info, indent=4)
106
+ return json_string
app.log CHANGED
@@ -1,678 +0,0 @@
1
- 2023-05-11 19:13:33,773 - INFO - The following text was scanned using OCR, your goal is to extract the important entities from the text and correct them with the help of the restrictions placed in the desired format. Remember to not make any changes on the labels of the desired format, simply extract the text, correct it and return only the desired format. Text: ARP No. 2017-04-0000-00009
2
- OWNER: RUDY MADRONA
3
- Address: ALCALA DARAGA, ALBAY
4
- Tel No.:
5
- Administrator/Beneficial User:
6
- Address:
7
- Tel No.:
8
- BUILDING LOCATION
9
- No. / Street
10
- Brgy/District
11
- ALCALA
12
- DA RAGA
13
- Municipality:
14
- Province/City
15
- ALBAY
16
- PROPERTY APPRAISAL
17
- Kind of Bldg.
18
- Structural Type V
19
- Bldg. Permit No.
20
- Date Issued
21
- Condominium Certificate of Title(CCT)
22
- Certificate of Completion Issued On:
23
- Certificate of Occupancy Issued On:
24
- Date Constructed/Completed:
25
- Date Occupied:
26
- REAL PROPERTY FIELD APPRAISAL & ASSESSMENT SHEET - BUILDING & OTHER
27
- STRUCTURES
28
- PIN 0310 400 301008 -1001
29
- TIN
30
- Tiles
31
- STRUCTURAL MATERIALS (Checklist)
32
- ROOF
33
- Reinforced Concrete
34
- G.I. Sheet
35
- Aluminum
36
- Asbestos
37
- Long Span
38
- Concrete Desk
39
- Nipa/Anahaw/Gogon
40
- Others (Specify)
41
- FLOORING
42
- Reinforced
43
- Concrete
44
- (for upper
45
- floor)
46
- Plain Cement
47
- TIN
48
- Floor Plan:
49
- Attach the building plan sketch of floor plan. A photograph may also be attached if necessary.
50
- Marble
51
- Wood
52
- Tiles
53
- Others
54
- (specify)
55
- LAND REFERENCE
56
- Owner
57
- OCT/TCT/CLOA No.
58
- Lot No.
59
- MADRONA, DEMETRIO
60
- TD/ARP No:
61
- Area
62
- Bldg. Age
63
- No. of Storeys
64
- Area of 1st flr:
65
- Area of 2nd flr:
66
- Area of 3rd flr:
67
- Area of 4th flr:
68
- TRANSACTION CODE
69
- Total Floor Area: 6 SQ.M.
70
- 1st 2nd 3rd 4th
71
- Flr. Flr. Flr. Flr.
72
- Plain
73
- Cement
74
- Wood
75
- Walls & 1st 2nd 3rd 4th
76
- Partitions Flr. Flr. Flr. Flr.
77
- Reinforced
78
- Concrete
79
- CHB
80
- G.I Sheet
81
- Survey No. 300
82
- Blk No.
83
- Build-a-
84
- wall
85
- Sawali
86
- Bamboo
87
- Others
88
- (Specify)
89
-
90
- Desired Format:
91
- ARP No.: <Numerical Value Only, Replace Slashes with the number 1>
92
- Owner: <Person's Name>
93
- Address: -||-
94
- Tel No.: -||-
95
- Administrator/Beneficial User: -||-
96
- Address: -||-
97
- Tel No.: -||-
98
- PIN: <Numerical Value Only, Replace Slashes with the number 1>
99
- TIN_1: <Numerical Value Only, Replace Slashes with the number 1>
100
- TIN_2: <Numerical Value Only, Replace Slashes with the number 1>
101
-
102
- 2023-05-11 19:13:37,266 - INFO - ARP No.: 2017-04-0000-00009
103
- Owner: RUDY MADRONA
104
- Address: ALCALA DARAGA, ALBAY
105
- Tel No.:
106
- Administrator/Beneficial User: -||-
107
- Address: -||-
108
- Tel No.: -||-
109
- PIN: 0310 400 301008-1001
110
- TIN_1: -||-
111
- TIN_2: -||-
112
- 2023-05-11 19:13:38,952 - INFO - The following text was scanned using OCR, your goal is to extract the important entities from the text and correct them with the help of the restrictions placed in the desired format. Remember to not make any changes on the labels of the desired format, simply extract the text, correct it and return only the desired format. Text: ARP Np. 2011-64-0003-00017
113
- OWNER: LLANTOS, JULIAN
114
- Address: ALCALA, DARAGA, AMBAY
115
- Tel No.:
116
- Administrator/Beneficial User:
117
- Address:
118
- Tel No.:
119
- BUILDING LOCATION
120
- No. / Street
121
- Brgy/District
122
- ALCALA
123
- DARAGA
124
- ALBAY
125
- Municipality:
126
- Province/City
127
- PROPERTY APPRAISAL
128
- Kind of Bldg.
129
- Structural Type 111-C
130
- Bldg. Permit No.
131
- Date Issued
132
- Condominium Certificate of Title(CCT)
133
- Certificate of Completion Issued On:
134
- Certificate of Occupancy Issued On:
135
- REAL PROPERTY FIELD APPRAISAL & ASSESSMENT SHEET - BUILDING & OTHER
136
- STRUCTURES
137
- PIN 0310 400301012-100)
138
- TIN
139
- Date Constructed/Completed:
140
- Date Occupied:
141
- STRUCTURAL MATERIALS (Checklist)
142
- Tiles
143
- ROOF
144
- Reinforced Concrete
145
- G.I. Sheet
146
- Aluminum
147
- Asbestos
148
- Long Span
149
- Concrete Desk
150
- Nipa/Anahaw/Gogon
151
- Others (Specify)
152
- Floor Plan:
153
- Attach the building plan sketch of floor plan. A photograph may also be attached if necessary.
154
- FLOORING
155
- Reinforced
156
- Concrete
157
- (for upper
158
- floor)
159
- Plain Cement
160
- Marble
161
- Wood
162
- Tiles
163
- TIN
164
- Others
165
- (specify)
166
- LAND REFERENCE
167
- Owner
168
- OCT/TCT/CLOA No.
169
- Lot No. 3095-P
170
- TD/ARP No:
171
- Area
172
- Bldg. Age
173
- No. of Storeys
174
- Area of 1st flr:
175
- Area of 2nd flr:
176
- Area of 3rd flr:
177
- Area of 4th flr:
178
- Total Floor Area: 49 SQ.M.
179
- 1st 2nd 3rd 4th
180
- Flr. Flr. Flr. Flr.
181
- TRANSACTION CODE
182
- Walls &
183
- Partitions
184
- Reinforced
185
- Concrete
186
- Plain
187
- Cement
188
- Wood
189
- CHB
190
- G.I Sheet
191
- Build-a-
192
- wall
193
- Sawali
194
- Bamboo
195
- Others
196
- (Specify)
197
- Survey No.
198
- Bik No.
199
- 1st 2nd 3rd 4th
200
- Flr. Fir. Flr. Flr.
201
-
202
- Desired Format:
203
- ARP No.: <Numerical Value Only, Replace Slashes with the number 1>
204
- Owner: <Person's Name>
205
- Address: -||-
206
- Tel No.: -||-
207
- Administrator/Beneficial User: -||-
208
- Address: -||-
209
- Tel No.: -||-
210
- PIN: <Numerical Value Only, Replace Slashes with the number 1>
211
- TIN_1: <Numerical Value Only, Replace Slashes with the number 1>
212
- TIN_2: <Numerical Value Only, Replace Slashes with the number 1>
213
-
214
- 2023-05-11 19:13:42,282 - INFO - ARP No.: 2011-64-0003-00017
215
- Owner: JULIAN LLANTOS
216
- Address: ALCALA, DARAGA, ALBAY
217
- Tel No.: -||-
218
- Administrator/Beneficial User: -||-
219
- Address: -||-
220
- Tel No.: -||-
221
- PIN: 0310400301012100
222
- TIN_1: -||-
223
- TIN_2: -||-
224
- 2023-05-11 19:13:44,006 - INFO - The following text was scanned using OCR, your goal is to extract the important entities from the text and correct them with the help of the restrictions placed in the desired format. Remember to not make any changes on the labels of the desired format, simply extract the text, correct it and return only the desired format. Text: ARP No. 2017-04-0003-00036
225
- OWNER: LISTANA, ROLANDO
226
- Address: ALCAL� DARAGA, ALBAY
227
- Tel No.:
228
- Administrator/Beneficial User:
229
- Address:
230
- Tel No.:
231
- BUILDING LOCATION
232
- No. / Street
233
- Brgy/District
234
- Municipality:
235
- ALCALA
236
- Province/City
237
- REAL PROPERTY FIELD APPRAISAL & ASSESSMENT SHEET - BUILDING & OTHER
238
- STRUCTURES
239
- DARALA
240
- ALBAY
241
- PROPERTY APPRAISAL
242
- Kind of Bldg.
243
- Structural Type V
244
- Bldg. Permit No.
245
- Date Issued
246
- Condominium Certificate of Title(CCT)
247
- Certificate of Completion Issued On:
248
- Certificate of Occupancy Issued On:
249
- Date Constructed/Completed: 1980
250
- Date Occupied:
251
- Tiles
252
- STRUCTURAL MATERIALS (Checklist)
253
- ROOF
254
- Reinforced Concrete
255
- G.I. Sheet
256
- Aluminum
257
- Asbestos
258
- Long Span
259
- Concrete Desk
260
- Nipa/Anahaw/Gogon
261
- Others (Specify)
262
- FLOORING
263
- Reinforced
264
- Concrete
265
- (for upper
266
- floor)
267
- Plain Cement
268
- Marble
269
- Wood
270
- Tiles
271
- Others
272
- (specify)
273
- PIN 0310400 301 024 -100
274
- TIN
275
- TIN
276
- LAND REFERENCE
277
- Owner
278
- LISTANA
279
- OCT/TCT/CLOA No.
280
- Lot No.
281
- Floor Plan:
282
- Attach the building plan sketch of floor plan. A photograph may also be attached if necessary.
283
- TD/ARP No:
284
- Area
285
- I
286
- 2798
287
- Bldg. Age
288
- No. of Storeys
289
- Area of 1st flr:
290
- Area of 2nd flr:
291
- Area of 3rd flr:
292
- Area of 4th flr:
293
- TRANSACTION CODE
294
- Total Floor Area: 125Q-m�
295
- 1st 2nd 3rd 4th
296
- Flr. Flr. Flr. Flr.
297
- MARIAND
298
- *
299
- Walls & 1st 2nd 3rd 4th
300
- Partitions Flr. Flr. Flr. Flr.
301
- Reinforced
302
- Concrete
303
- Plain
304
- Cement
305
- Wood
306
- CHB
307
- G.1 Sheet
308
- Survey No. 4684
309
- Blk No.
310
- Build-a-
311
- wall
312
- Sawali
313
- Bamboo
314
- Others
315
- (Specify)
316
-
317
- Desired Format:
318
- ARP No.: <Numerical Value Only, Replace Slashes with the number 1>
319
- Owner: <Person's Name>
320
- Address: -||-
321
- Tel No.: -||-
322
- Administrator/Beneficial User: -||-
323
- Address: -||-
324
- Tel No.: -||-
325
- PIN: <Numerical Value Only, Replace Slashes with the number 1>
326
- TIN_1: <Numerical Value Only, Replace Slashes with the number 1>
327
- TIN_2: <Numerical Value Only, Replace Slashes with the number 1>
328
-
329
- 2023-05-11 19:13:48,089 - INFO - ARP No.: 2017-04-0003-00036
330
- Owner: ROLANDO LISTANA
331
- Address: ALCALA DARAGA, ALBAY
332
- Tel No.: -||-
333
- Administrator/Beneficial User: -||-
334
- Address: -||-
335
- Tel No.: -||-
336
- PIN: 0310400 301 024-100
337
- TIN_1: -||- (no value provided)
338
- TIN_2: -||- (no value provided)
339
- 2023-05-11 19:13:48,089 - INFO - [{'File Name': 'DARAGA-ALCALA-0017', 'General Information': {'ARP No.': '2017-04-0000-00009', 'Owner': 'RUDY MADRONA', 'Address': '', 'Tel No.': '', 'Administrator/Beneficial User': '', 'Address:': '', 'Tel No.:': '', 'PIN': '0310 400 301008-1001', 'TIN_1': '', 'TIN_2': ''}, 'Building Location': {'No. / Street': '', 'Brgy/District': '', 'Municipality': '', 'Province/city': ''}, 'Land Reference': {'Owner': '', 'OCT/TCT/CLOA NO.': '', 'Lot No.': '', 'Survey No.': '', 'Blk No.': '', 'TD/ARP No.:': '', 'Area': ''}, 'Property Appraisal': {'Kind of Bldg': '', 'Structural Type': '', 'Bldg. Permit No.': '', 'Date Issued': '', 'Condominium Certificate of Title(CCT)': '', 'Certificate of Completion Issued on': '', 'Certificate of Occupancy Issued on': '', 'Date Constructed/Completed': '', 'Date Occupied': '', 'Bldg. Age': '', 'No. of Storeys': '', 'Area of 1st Flr': '', 'Area of 2nd Flr': '', 'Area of 3rd Flr': '', 'Area of 4th Flr': '', 'Total Floor Area': ''}}, {'File Name': 'DARAGA-ALCALA-0033', 'General Information': {'ARP No.': '2011-64-0003-00017', 'Owner': 'JULIAN LLANTOS', 'Address': '', 'Tel No.': '', 'Administrator/Beneficial User': '', 'Address:': '', 'Tel No.:': '', 'PIN': '0310400301012100', 'TIN_1': '', 'TIN_2': ''}, 'Building Location': {'No. / Street': '', 'Brgy/District': '', 'Municipality': '', 'Province/city': ''}, 'Land Reference': {'Owner': '', 'OCT/TCT/CLOA NO.': '', 'Lot No.': '', 'Survey No.': '', 'Blk No.': '', 'TD/ARP No.:': '', 'Area': ''}, 'Property Appraisal': {'Kind of Bldg': '', 'Structural Type': '', 'Bldg. Permit No.': '', 'Date Issued': '', 'Condominium Certificate of Title(CCT)': '', 'Certificate of Completion Issued on': '', 'Certificate of Occupancy Issued on': '', 'Date Constructed/Completed': '', 'Date Occupied': '', 'Bldg. Age': '', 'No. of Storeys': '', 'Area of 1st Flr': '', 'Area of 2nd Flr': '', 'Area of 3rd Flr': '', 'Area of 4th Flr': '', 'Total Floor Area': ''}}, {'File Name': 'DARAGA-ALCALA-0071', 'General Information': {'ARP No.': '2017-04-0003-00036', 'Owner': 'ROLANDO LISTANA', 'Address': '', 'Tel No.': '', 'Administrator/Beneficial User': '', 'Address:': '', 'Tel No.:': '', 'PIN': '0310400 301 024-100', 'TIN_1': '(no value provided)', 'TIN_2': '(no value provided)'}, 'Building Location': {'No. / Street': '', 'Brgy/District': '', 'Municipality': '', 'Province/city': ''}, 'Land Reference': {'Owner': '', 'OCT/TCT/CLOA NO.': '', 'Lot No.': '', 'Survey No.': '', 'Blk No.': '', 'TD/ARP No.:': '', 'Area': ''}, 'Property Appraisal': {'Kind of Bldg': '', 'Structural Type': '', 'Bldg. Permit No.': '', 'Date Issued': '', 'Condominium Certificate of Title(CCT)': '', 'Certificate of Completion Issued on': '', 'Certificate of Occupancy Issued on': '', 'Date Constructed/Completed': '', 'Date Occupied': '', 'Bldg. Age': '', 'No. of Storeys': '', 'Area of 1st Flr': '', 'Area of 2nd Flr': '', 'Area of 3rd Flr': '', 'Area of 4th Flr': '', 'Total Floor Area': ''}}]
340
- 2023-05-11 20:18:32,352 - INFO - The following text was scanned using OCR, your goal is to extract the important entities from the text and correct them with the help of the restrictions placed in the desired format. Remember to not make any changes on the labels of the desired format, simply extract the text, correct it and return only the desired format. Text: ARP No. 2017-04-0000-00009
341
- OWNER: RUDY MADRONA
342
- Address: ALCALA DARAGA, ALBAY
343
- Tel No.:
344
- Administrator/Beneficial User:
345
- Address:
346
- Tel No.:
347
- BUILDING LOCATION
348
- No. / Street
349
- Brgy/District
350
- ALCALA
351
- DA RAGA
352
- Municipality:
353
- Province/City
354
- ALBAY
355
- PROPERTY APPRAISAL
356
- Kind of Bldg.
357
- Structural Type V
358
- Bldg. Permit No.
359
- Date Issued
360
- Condominium Certificate of Title(CCT)
361
- Certificate of Completion Issued On:
362
- Certificate of Occupancy Issued On:
363
- Date Constructed/Completed:
364
- Date Occupied:
365
- REAL PROPERTY FIELD APPRAISAL & ASSESSMENT SHEET - BUILDING & OTHER
366
- STRUCTURES
367
- PIN 0310 400 301008 -1001
368
- TIN
369
- Tiles
370
- STRUCTURAL MATERIALS (Checklist)
371
- ROOF
372
- Reinforced Concrete
373
- G.I. Sheet
374
- Aluminum
375
- Asbestos
376
- Long Span
377
- Concrete Desk
378
- Nipa/Anahaw/Gogon
379
- Others (Specify)
380
- FLOORING
381
- Reinforced
382
- Concrete
383
- (for upper
384
- floor)
385
- Plain Cement
386
- TIN
387
- Floor Plan:
388
- Attach the building plan sketch of floor plan. A photograph may also be attached if necessary.
389
- Marble
390
- Wood
391
- Tiles
392
- Others
393
- (specify)
394
- LAND REFERENCE
395
- Owner
396
- OCT/TCT/CLOA No.
397
- Lot No.
398
- MADRONA, DEMETRIO
399
- TD/ARP No:
400
- Area
401
- Bldg. Age
402
- No. of Storeys
403
- Area of 1st flr:
404
- Area of 2nd flr:
405
- Area of 3rd flr:
406
- Area of 4th flr:
407
- TRANSACTION CODE
408
- Total Floor Area: 6 SQ.M.
409
- 1st 2nd 3rd 4th
410
- Flr. Flr. Flr. Flr.
411
- Plain
412
- Cement
413
- Wood
414
- Walls & 1st 2nd 3rd 4th
415
- Partitions Flr. Flr. Flr. Flr.
416
- Reinforced
417
- Concrete
418
- CHB
419
- G.I Sheet
420
- Survey No. 300
421
- Blk No.
422
- Build-a-
423
- wall
424
- Sawali
425
- Bamboo
426
- Others
427
- (Specify)
428
-
429
- Desired Format:
430
- ARP No.: <Numerical Value Only, Replace Slashes with the number 1>
431
- Owner: <Person's Name>
432
- Address: -||-
433
- Tel No.: -||-
434
- Administrator/Beneficial User: -||-
435
- Address: -||-
436
- Tel No.: -||-
437
- PIN: <Numerical Value Only, Replace Slashes with the number 1>
438
- TIN_1: <Numerical Value Only, Replace Slashes with the number 1>
439
- TIN_2: <Numerical Value Only, Replace Slashes with the number 1>
440
-
441
- 2023-05-11 20:18:35,277 - INFO - ARP No.: 2017-04-0000-00009
442
- Owner: Rudy Madrona
443
- Address: Alcala Daraga, Albay
444
- Tel No.:
445
- Administrator/Beneficial User:
446
- Address:
447
- Tel No.:
448
- PIN: 0310 400 301008-1001
449
- TIN_1:
450
- TIN_2:
451
- 2023-05-11 20:18:37,028 - INFO - The following text was scanned using OCR, your goal is to extract the important entities from the text and correct them with the help of the restrictions placed in the desired format. Remember to not make any changes on the labels of the desired format, simply extract the text, correct it and return only the desired format. Text: ARP Np. 2011-64-0003-00017
452
- OWNER: LLANTOS, JULIAN
453
- Address: ALCALA, DARAGA, AMBAY
454
- Tel No.:
455
- Administrator/Beneficial User:
456
- Address:
457
- Tel No.:
458
- BUILDING LOCATION
459
- No. / Street
460
- Brgy/District
461
- ALCALA
462
- DARAGA
463
- ALBAY
464
- Municipality:
465
- Province/City
466
- PROPERTY APPRAISAL
467
- Kind of Bldg.
468
- Structural Type 111-C
469
- Bldg. Permit No.
470
- Date Issued
471
- Condominium Certificate of Title(CCT)
472
- Certificate of Completion Issued On:
473
- Certificate of Occupancy Issued On:
474
- REAL PROPERTY FIELD APPRAISAL & ASSESSMENT SHEET - BUILDING & OTHER
475
- STRUCTURES
476
- PIN 0310 400301012-100)
477
- TIN
478
- Date Constructed/Completed:
479
- Date Occupied:
480
- STRUCTURAL MATERIALS (Checklist)
481
- Tiles
482
- ROOF
483
- Reinforced Concrete
484
- G.I. Sheet
485
- Aluminum
486
- Asbestos
487
- Long Span
488
- Concrete Desk
489
- Nipa/Anahaw/Gogon
490
- Others (Specify)
491
- Floor Plan:
492
- Attach the building plan sketch of floor plan. A photograph may also be attached if necessary.
493
- FLOORING
494
- Reinforced
495
- Concrete
496
- (for upper
497
- floor)
498
- Plain Cement
499
- Marble
500
- Wood
501
- Tiles
502
- TIN
503
- Others
504
- (specify)
505
- LAND REFERENCE
506
- Owner
507
- OCT/TCT/CLOA No.
508
- Lot No. 3095-P
509
- TD/ARP No:
510
- Area
511
- Bldg. Age
512
- No. of Storeys
513
- Area of 1st flr:
514
- Area of 2nd flr:
515
- Area of 3rd flr:
516
- Area of 4th flr:
517
- Total Floor Area: 49 SQ.M.
518
- 1st 2nd 3rd 4th
519
- Flr. Flr. Flr. Flr.
520
- TRANSACTION CODE
521
- Walls &
522
- Partitions
523
- Reinforced
524
- Concrete
525
- Plain
526
- Cement
527
- Wood
528
- CHB
529
- G.I Sheet
530
- Build-a-
531
- wall
532
- Sawali
533
- Bamboo
534
- Others
535
- (Specify)
536
- Survey No.
537
- Bik No.
538
- 1st 2nd 3rd 4th
539
- Flr. Fir. Flr. Flr.
540
-
541
- Desired Format:
542
- ARP No.: <Numerical Value Only, Replace Slashes with the number 1>
543
- Owner: <Person's Name>
544
- Address: -||-
545
- Tel No.: -||-
546
- Administrator/Beneficial User: -||-
547
- Address: -||-
548
- Tel No.: -||-
549
- PIN: <Numerical Value Only, Replace Slashes with the number 1>
550
- TIN_1: <Numerical Value Only, Replace Slashes with the number 1>
551
- TIN_2: <Numerical Value Only, Replace Slashes with the number 1>
552
-
553
- 2023-05-11 20:18:39,870 - INFO - ARP No.: 2011-64-0003-00017
554
- Owner: LLANTOS, JULIAN
555
- Address: ALCALA, DARAGA, ALBAY
556
- Tel No.:
557
- Administrator/Beneficial User:
558
- Address:
559
- Tel No.:
560
- PIN: 0310 400301012-100
561
- TIN_1:
562
- TIN_2:
563
- 2023-05-11 20:18:41,647 - INFO - The following text was scanned using OCR, your goal is to extract the important entities from the text and correct them with the help of the restrictions placed in the desired format. Remember to not make any changes on the labels of the desired format, simply extract the text, correct it and return only the desired format. Text: ARP No. 2017-04-0003-00036
564
- OWNER: LISTANA, ROLANDO
565
- Address: ALCAL� DARAGA, ALBAY
566
- Tel No.:
567
- Administrator/Beneficial User:
568
- Address:
569
- Tel No.:
570
- BUILDING LOCATION
571
- No. / Street
572
- Brgy/District
573
- Municipality:
574
- ALCALA
575
- Province/City
576
- REAL PROPERTY FIELD APPRAISAL & ASSESSMENT SHEET - BUILDING & OTHER
577
- STRUCTURES
578
- DARALA
579
- ALBAY
580
- PROPERTY APPRAISAL
581
- Kind of Bldg.
582
- Structural Type V
583
- Bldg. Permit No.
584
- Date Issued
585
- Condominium Certificate of Title(CCT)
586
- Certificate of Completion Issued On:
587
- Certificate of Occupancy Issued On:
588
- Date Constructed/Completed: 1980
589
- Date Occupied:
590
- Tiles
591
- STRUCTURAL MATERIALS (Checklist)
592
- ROOF
593
- Reinforced Concrete
594
- G.I. Sheet
595
- Aluminum
596
- Asbestos
597
- Long Span
598
- Concrete Desk
599
- Nipa/Anahaw/Gogon
600
- Others (Specify)
601
- FLOORING
602
- Reinforced
603
- Concrete
604
- (for upper
605
- floor)
606
- Plain Cement
607
- Marble
608
- Wood
609
- Tiles
610
- Others
611
- (specify)
612
- PIN 0310400 301 024 -100
613
- TIN
614
- TIN
615
- LAND REFERENCE
616
- Owner
617
- LISTANA
618
- OCT/TCT/CLOA No.
619
- Lot No.
620
- Floor Plan:
621
- Attach the building plan sketch of floor plan. A photograph may also be attached if necessary.
622
- TD/ARP No:
623
- Area
624
- I
625
- 2798
626
- Bldg. Age
627
- No. of Storeys
628
- Area of 1st flr:
629
- Area of 2nd flr:
630
- Area of 3rd flr:
631
- Area of 4th flr:
632
- TRANSACTION CODE
633
- Total Floor Area: 125Q-m�
634
- 1st 2nd 3rd 4th
635
- Flr. Flr. Flr. Flr.
636
- MARIAND
637
- *
638
- Walls & 1st 2nd 3rd 4th
639
- Partitions Flr. Flr. Flr. Flr.
640
- Reinforced
641
- Concrete
642
- Plain
643
- Cement
644
- Wood
645
- CHB
646
- G.1 Sheet
647
- Survey No. 4684
648
- Blk No.
649
- Build-a-
650
- wall
651
- Sawali
652
- Bamboo
653
- Others
654
- (Specify)
655
-
656
- Desired Format:
657
- ARP No.: <Numerical Value Only, Replace Slashes with the number 1>
658
- Owner: <Person's Name>
659
- Address: -||-
660
- Tel No.: -||-
661
- Administrator/Beneficial User: -||-
662
- Address: -||-
663
- Tel No.: -||-
664
- PIN: <Numerical Value Only, Replace Slashes with the number 1>
665
- TIN_1: <Numerical Value Only, Replace Slashes with the number 1>
666
- TIN_2: <Numerical Value Only, Replace Slashes with the number 1>
667
-
668
- 2023-05-11 20:18:45,356 - INFO - ARP No.: 2017-04-0003-000361
669
- Owner: ROLANDO LISTANA
670
- Address: ALCALA DARAGA, ALBAY
671
- Tel No.: -||-
672
- Administrator/Beneficial User: -||-
673
- Address: -||-
674
- Tel No.: -||-
675
- PIN: 0310400301024100
676
- TIN_1: -||-
677
- TIN_2: -||-
678
- 2023-05-11 20:18:45,357 - INFO - [{'File Name': 'DARAGA-ALCALA-0017', 'General Information': {'ARP No.': '2017-04-0000-00009', 'Owner': 'Rudy Madrona', 'Address': '', 'Tel No.': '', 'Administrator/Beneficial User': '', 'Address:': '', 'Tel No.:': '', 'PIN': '0310 400 301008-1001', 'TIN_1': '', 'TIN_2': ''}, 'Building Location': {'No. / Street': '', 'Brgy/District': '', 'Municipality': '', 'Province/city': ''}, 'Land Reference': {'Owner': '', 'OCT/TCT/CLOA NO.': '', 'Lot No.': '', 'Survey No.': '', 'Blk No.': '', 'TD/ARP No.:': '', 'Area': ''}, 'Property Appraisal': {'Kind of Bldg': '', 'Structural Type': '', 'Bldg. Permit No.': '', 'Date Issued': '', 'Condominium Certificate of Title(CCT)': '', 'Certificate of Completion Issued on': '', 'Certificate of Occupancy Issued on': '', 'Date Constructed/Completed': '', 'Date Occupied': '', 'Bldg. Age': '', 'No. of Storeys': '', 'Area of 1st Flr': '', 'Area of 2nd Flr': '', 'Area of 3rd Flr': '', 'Area of 4th Flr': '', 'Total Floor Area': ''}}, {'File Name': 'DARAGA-ALCALA-0033', 'General Information': {'ARP No.': '2011-64-0003-00017', 'Owner': 'LLANTOS, JULIAN', 'Address': '', 'Tel No.': '', 'Administrator/Beneficial User': '', 'Address:': '', 'Tel No.:': '', 'PIN': '0310 400301012-100', 'TIN_1': '', 'TIN_2': ''}, 'Building Location': {'No. / Street': '', 'Brgy/District': '', 'Municipality': '', 'Province/city': ''}, 'Land Reference': {'Owner': '', 'OCT/TCT/CLOA NO.': '', 'Lot No.': '', 'Survey No.': '', 'Blk No.': '', 'TD/ARP No.:': '', 'Area': ''}, 'Property Appraisal': {'Kind of Bldg': '', 'Structural Type': '', 'Bldg. Permit No.': '', 'Date Issued': '', 'Condominium Certificate of Title(CCT)': '', 'Certificate of Completion Issued on': '', 'Certificate of Occupancy Issued on': '', 'Date Constructed/Completed': '', 'Date Occupied': '', 'Bldg. Age': '', 'No. of Storeys': '', 'Area of 1st Flr': '', 'Area of 2nd Flr': '', 'Area of 3rd Flr': '', 'Area of 4th Flr': '', 'Total Floor Area': ''}}, {'File Name': 'DARAGA-ALCALA-0071', 'General Information': {'ARP No.': '2017-04-0003-000361', 'Owner': 'ROLANDO LISTANA', 'Address': '', 'Tel No.': '', 'Administrator/Beneficial User': '', 'Address:': '', 'Tel No.:': '', 'PIN': '0310400301024100', 'TIN_1': '', 'TIN_2': ''}, 'Building Location': {'No. / Street': '', 'Brgy/District': '', 'Municipality': '', 'Province/city': ''}, 'Land Reference': {'Owner': '', 'OCT/TCT/CLOA NO.': '', 'Lot No.': '', 'Survey No.': '', 'Blk No.': '', 'TD/ARP No.:': '', 'Area': ''}, 'Property Appraisal': {'Kind of Bldg': '', 'Structural Type': '', 'Bldg. Permit No.': '', 'Date Issued': '', 'Condominium Certificate of Title(CCT)': '', 'Certificate of Completion Issued on': '', 'Certificate of Occupancy Issued on': '', 'Date Constructed/Completed': '', 'Date Occupied': '', 'Bldg. Age': '', 'No. of Storeys': '', 'Area of 1st Flr': '', 'Area of 2nd Flr': '', 'Area of 3rd Flr': '', 'Area of 4th Flr': '', 'Total Floor Area': ''}}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,188 +1,38 @@
1
- import os
2
  import openai
3
  import gradio as gr
4
- import requests
5
- import datetime
6
- from io import BytesIO
7
- from google.api_core.client_options import ClientOptions
8
- from google.cloud import documentai_v1 as documentai
9
  import json
10
- from google.cloud import vision
11
  import time
12
- from settings import char_remove, gpt_model, RPFAAP2, RPFAAP1, project_id, project_location, processor_id
13
  from tqdm import tqdm
14
  import logging
 
15
  import google
 
 
 
16
 
17
  logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
18
 
19
- def chat_gpt_image(content, context):
20
- openai.api_key = os.environ['GPT_API_KEY']
21
- prompt = "You are an expert at identifying OCR errors and correcting them with the help of context, intuition and logic."
22
- document = "The following text was scanned using OCR, your goal is to return a corrected version of the text"
23
- prefix = "Additionally"
24
- if context == "":
25
- sequence = (document, content)
26
- else:
27
- sequence_1 = (prefix, context)
28
- additional = (" ".join(sequence_1))
29
- sequence = (additional, content)
30
-
31
- final_content = (" ".join(sequence))
32
- logging.info(final_content)
33
- completion = openai.ChatCompletion.create(
34
- model=gpt_model,
35
- user="1",
36
- temperature=0.1,
37
- messages=[
38
- {"role": "system", "content": prompt},
39
- {"role": "user", "content": final_content}
40
- ]
41
- )
42
- logging.info(completion.choices[0].message.content)
43
- return(completion.choices[0].message.content)
44
-
45
- def remove_na(string):
46
- for char in char_remove:
47
- string = string.replace(char, "")
48
- return string
49
-
50
- def chat_gpt_document(content, document_type, context):
51
- openai.api_key = os.environ['GPT_API_KEY']
52
- prompt = "You are an expert at identifying OCR errors and correcting them with the help of context, intuition and logic."
53
- document_prefix = "The following text was scanned using OCR, your goal is to extract the important entities from the text and correct them with the help of the restrictions placed in the desired format. Remember to not make any changes on the labels of the desired format, simply extract the text, correct it and return only the desired format. Text:"
54
- additional_prefix = "Additionally the text"
55
-
56
- content_info = content[0]
57
- content_name = content[1]
58
- if document_type == "RPFAA Building P1":
59
- document = "RPFAAP1.json"
60
- desired_format = RPFAAP1
61
- elif document_type == "RPFAA Building P2":
62
- document = "RPFAAP2.json"
63
- desired_format = RPFAAP2
64
- else:
65
- property_info = ["Please Select a Document Type"]
66
- return json.dumps(property_info, indent=4)
67
-
68
- if context == "":
69
- sequence_1 = (document_prefix, content_info, desired_format)
70
- else:
71
- sequence_1 = (document_prefix, content_info, desired_format, additional_prefix, context)
72
-
73
- content_1 = (" ".join(sequence_1))
74
- logging.info(content_1)
75
-
76
- completion_1 = openai.ChatCompletion.create(
77
- model=gpt_model,
78
- user="1",
79
- temperature=0.1,
80
- messages=[
81
- {"role": "system", "content": prompt},
82
- {"role": "user", "content": content_1}
83
- ]
84
- )
85
- logging.info(completion_1.choices[0].message.content)
86
- input_string = remove_na(completion_1.choices[0].message.content)
87
-
88
- with open(document) as f:
89
- property_info = json.load(f)
90
- #Adds the name of the file
91
- property_info["File Name"] = content_name
92
- #Fills in the information
93
- for line in input_string.split('\n'):
94
- if ':' in line:
95
- key, value = line.split(':', 1)
96
- key = key.strip()
97
- for category in property_info:
98
- if key in property_info[category]:
99
- property_info[category][key] = value.strip()
100
- break
101
- else:
102
- if key == "File Name":
103
- property_info[key] = value.strip()
104
- return json.dumps(property_info, indent=4)
105
-
106
- def detect_image(content, lang):
107
- credentials = json.loads(os.environ['CREDENTIALS'])
108
- temp_file_path = 'temp_credentials.json'
109
- with open(temp_file_path, 'w') as file:
110
- json.dump(credentials, file)
111
- os.environ['GOOGLE_APPLICATION_CREDENTIALS']=r'temp_credentials.json'
112
- client = vision.ImageAnnotatorClient()
113
- buffer = BytesIO()
114
- content.save(buffer, format="PNG")
115
- content = buffer.getvalue()
116
- if lang == "Filpino":
117
- hints = "tl"
118
  else:
119
- hints = "en"
120
- image = vision.Image(content=content)
121
-
122
- response = client.document_text_detection(image=image, image_context={"language_hints": [hints]})
123
-
124
- if response.error.message:
125
- raise Exception(
126
- '{}\nFor more info on error messages, check: '
127
- 'https://cloud.google.com/apis/design/errors'.format(
128
- response.error.message))
129
-
130
- os.remove(temp_file_path)
131
- logging.info(response)
132
- return(response.full_text_annotation.text)
133
-
134
- def detect_document(content):
135
- credentials = json.loads(os.environ['CREDENTIALS'])
136
- temp_file_path = 'temp_credentials.json'
137
- with open(temp_file_path, 'w') as file:
138
- json.dump(credentials, file)
139
- os.environ['GOOGLE_APPLICATION_CREDENTIALS']=r'temp_credentials.json'
140
- PROJECT_ID = project_id
141
- LOCATION = project_location # Format is 'us' or 'eu'
142
- PROCESSOR_ID = processor_id # Create processor in Cloud Console
143
- content_extension = content.name.split(".")[-1]
144
-
145
- if content_extension.upper() == "TIFF":
146
- MIME_TYPE = "image/tiff"
147
- elif content_extension.upper() =="PDF":
148
- MIME_TYPE = "application/pdf"
149
- elif content_extension.upper() =="PNG":
150
- MIME_TYPE = "image/png"
151
- elif content_extension.upper() =="JPG":
152
- MIME_TYPE = "image/jpg"
153
- else:
154
- return("Please upload a valid MIME type")
155
-
156
- docai_client = documentai.DocumentProcessorServiceClient(
157
- client_options=ClientOptions(api_endpoint=f"{LOCATION}-documentai.googleapis.com")
158
- )
159
-
160
- RESOURCE_NAME = docai_client.processor_path(PROJECT_ID, LOCATION, PROCESSOR_ID)
161
 
162
- with open(content.name, "rb") as image:
163
- image_content = image.read()
164
-
165
- raw_document = documentai.RawDocument(content=image_content, mime_type=MIME_TYPE)
166
-
167
- request = documentai.ProcessRequest(name=RESOURCE_NAME, raw_document=raw_document)
168
-
169
- result = docai_client.process_document(request=request)
170
-
171
- document_object = result.document
172
-
173
- name = content.name.split('\\')[-1]
174
- name = name.split("/")[-1]
175
- name = name.split('.')[0]
176
-
177
- os.remove(temp_file_path)
178
-
179
- return(document_object.text, name)
180
-
181
- def image(content, lang, context):
182
- return chat_gpt_image(detect_image(content, lang), context)
183
-
184
- def document(content, document_type, context):
185
- return chat_gpt_document(detect_document(content),document_type,context)
186
 
187
  unprocessed_documents = []
188
  global_document_type = None
@@ -218,8 +68,8 @@ def batch_document(content, document_type, context, progress = gr.Progress()):
218
  else:
219
  progress(0, desc="Starting")
220
  for x in progress.tqdm(content, desc="Processing"):
221
- retries = 3
222
- timeout = 3
223
  i = 0
224
  while True:
225
  try:
@@ -241,39 +91,10 @@ def batch_document(content, document_type, context, progress = gr.Progress()):
241
  if document_type == "":
242
  document_type = "error"
243
  return save_json(combined_data, document_type)
244
-
245
- def retry_unprocessed_documents():
246
- # This function will use the documents stored in unprocessed_documents
247
- # and call batch_document on them
248
- global global_document_type
249
- global global_context
250
- global unprocessed_documents
251
- if unprocessed_documents:
252
- output = batch_document(unprocessed_documents, global_document_type, global_context, "None")
253
- unprocessed_documents = []
254
- return output
255
- else:
256
- unprocessed_documents = []
257
- return save_json("No Unprocessed Documents", "No Unprocessed Documents")
258
-
259
- def save_json(text, filename):
260
- filename = filename+".json"
261
- with open(filename, "w", encoding='utf-8') as outfile:
262
- json.dump(text, outfile, ensure_ascii=False)
263
- return filename
264
-
265
- def combine_json_files(json_files, progress=gr.Progress()):
266
- combined_data = []
267
- progress(0, desc="Starting")
268
- for file in progress.tqdm(json_files, desc="Combining JSON Files"):
269
- with open(file.name, 'r') as json_file:
270
- data = json.load(json_file)
271
- combined_data.extend(data)
272
- # Convert the combined_data dict back to a JSON string
273
- # You might want to save this to a file and return the file,
274
- # or return the JSON string directly
275
- logging.info("Combined JSON File: ", combined_data)
276
- return save_json(combined_data, "Combined Json")
277
 
278
  with gr.Blocks(title="Axon OCR", css=".markdown {text-align: center;}") as app:
279
  gr.Markdown("""# Axon OCR
@@ -282,7 +103,6 @@ with gr.Blocks(title="Axon OCR", css=".markdown {text-align: center;}") as app:
282
  with gr.Row():
283
  with gr.Column():
284
  image_input = [gr.Image(type="pil"),
285
- gr.Radio(["English", "Filipino"], label="Language", info="What is the document language? (Optional)"),
286
  gr.Textbox(label="What kind of Image is this? (Optional)", placeholder="This is an image of an Official Reciept")]
287
  image_output = gr.Textbox(label="Result")
288
  image_button = gr.Button("Scan")
@@ -290,7 +110,7 @@ with gr.Blocks(title="Axon OCR", css=".markdown {text-align: center;}") as app:
290
  with gr.Row():
291
  with gr.Column():
292
  document_input = [gr.File(file_types=["pdf","tiff","image","text"]),
293
- gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2"], label="File Type", info="What type of document is this?"),
294
  gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
295
  document_output = gr.Textbox(label="Result")
296
  document_button = gr.Button("Scan")
@@ -298,7 +118,7 @@ with gr.Blocks(title="Axon OCR", css=".markdown {text-align: center;}") as app:
298
  with gr.Row():
299
  with gr.Column():
300
  batch_document_input = [gr.File(file_types=["pdf","tiff","image","text"], file_count="multiple"),
301
- gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2"], label="File Type", info="What type of document is this?"),
302
  gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
303
  batch_document_output = gr.File(label="Result")
304
  batch_document_button = gr.Button("Scan")
@@ -306,7 +126,7 @@ with gr.Blocks(title="Axon OCR", css=".markdown {text-align: center;}") as app:
306
  with gr.Column():
307
  retry_button = gr.Button("Retry Unprocessed Documents", label="Retry")
308
  with gr.Column():
309
- stop_button = gr.Button("Stop Processing Documents", label="Stop")
310
  with gr.Tab("Combine JSON"):
311
  with gr.Row():
312
  with gr.Column():
@@ -322,4 +142,4 @@ with gr.Blocks(title="Axon OCR", css=".markdown {text-align: center;}") as app:
322
  combine_button.click(combine_json_files, inputs=json_files_input, outputs=combined_json_output)
323
 
324
  app.queue()
325
- app.launch(auth=("username", "password"))
 
 
1
  import openai
2
  import gradio as gr
 
 
 
 
 
3
  import json
 
4
  import time
 
5
  from tqdm import tqdm
6
  import logging
7
+ import requests
8
  import google
9
+ from ocr_functions import detect_document, detect_image
10
+ from ai_functions import chat_gpt_document, chat_gpt_image
11
+ from helpers import save_json
12
 
13
  logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
14
 
15
+ def retry_unprocessed_documents():
16
+ global global_document_type
17
+ global global_context
18
+ global unprocessed_documents
19
+ if unprocessed_documents:
20
+ output = batch_document(unprocessed_documents, global_document_type, global_context, "None")
21
+ unprocessed_documents = []
22
+ return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  else:
24
+ unprocessed_documents = []
25
+ return save_json("No Unprocessed Documents", "No Unprocessed Documents")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ def combine_json_files(json_files, progress=gr.Progress()):
28
+ combined_data = []
29
+ progress(0, desc="Starting")
30
+ for file in progress.tqdm(json_files, desc="Combining JSON Files"):
31
+ with open(file.name, 'r') as json_file:
32
+ data = json.load(json_file)
33
+ combined_data.extend(data)
34
+ logging.info("Combined JSON File: ", combined_data)
35
+ return save_json(combined_data, "Combined Json")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  unprocessed_documents = []
38
  global_document_type = None
 
68
  else:
69
  progress(0, desc="Starting")
70
  for x in progress.tqdm(content, desc="Processing"):
71
+ retries = 1
72
+ timeout = 1
73
  i = 0
74
  while True:
75
  try:
 
91
  if document_type == "":
92
  document_type = "error"
93
  return save_json(combined_data, document_type)
94
+ def image(content, context):
95
+ return chat_gpt_image(detect_image(content), context)
96
+ def document(content, document_type, context):
97
+ return chat_gpt_document(detect_document(content),document_type,context)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
  with gr.Blocks(title="Axon OCR", css=".markdown {text-align: center;}") as app:
100
  gr.Markdown("""# Axon OCR
 
103
  with gr.Row():
104
  with gr.Column():
105
  image_input = [gr.Image(type="pil"),
 
106
  gr.Textbox(label="What kind of Image is this? (Optional)", placeholder="This is an image of an Official Reciept")]
107
  image_output = gr.Textbox(label="Result")
108
  image_button = gr.Button("Scan")
 
110
  with gr.Row():
111
  with gr.Column():
112
  document_input = [gr.File(file_types=["pdf","tiff","image","text"]),
113
+ gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2", "TDRP"], label="File Type", info="What type of document is this?"),
114
  gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
115
  document_output = gr.Textbox(label="Result")
116
  document_button = gr.Button("Scan")
 
118
  with gr.Row():
119
  with gr.Column():
120
  batch_document_input = [gr.File(file_types=["pdf","tiff","image","text"], file_count="multiple"),
121
+ gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2", "TDRP"], label="File Type", info="What type of document is this?"),
122
  gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
123
  batch_document_output = gr.File(label="Result")
124
  batch_document_button = gr.Button("Scan")
 
126
  with gr.Column():
127
  retry_button = gr.Button("Retry Unprocessed Documents", label="Retry")
128
  with gr.Column():
129
+ stop_button = gr.Button("Stop Processing Document", label="Stop")
130
  with gr.Tab("Combine JSON"):
131
  with gr.Row():
132
  with gr.Column():
 
142
  combine_button.click(combine_json_files, inputs=json_files_input, outputs=combined_json_output)
143
 
144
  app.queue()
145
+ app.launch(share=True, auth=("username", "password"))
gr.py DELETED
@@ -1,11 +0,0 @@
1
- import json
2
- import gradio as gr
3
-
4
- def save_json(text):
5
- with open("output.json", "w") as outfile:
6
- json.dump(text, outfile)
7
-
8
- return "output.json"
9
-
10
- demo = gr.Interface(save_text_as_json, "text", "file")
11
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
helpers.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from settings import char_remove
2
+ import re
3
+ import json
4
+ import logging
5
+ logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
6
+
7
+ def remove_na(string):
8
+ for char in char_remove:
9
+ string = string.replace(char, "")
10
+ return string
11
+
12
+ def save_json(text, filename):
13
+ filename = filename+".json"
14
+ with open(filename, "w", encoding='utf-8') as outfile:
15
+ json.dump(text, outfile, ensure_ascii=False)
16
+ return filename
17
+
18
+ def format_polygon(polygon):
19
+ if not polygon:
20
+ return "N/A"
21
+ return ", ".join(["[{}, {}]".format(p.x, p.y) for p in polygon])
22
+
23
+ def filter_tables(input_string, table_numbers):
24
+ # Splitting the input_string into tables
25
+ tables = re.split(r"Table # \d+", input_string)[1:] # we start from 1 to exclude the initial empty string
26
+
27
+ json_tables = {}
28
+ table_counter = 1
29
+
30
+ for table_number in table_numbers:
31
+ # Picking the specific table
32
+ table_str = tables[table_number]
33
+
34
+ # Extracting cell coordinates and contents
35
+ cells = re.findall(r"Cell\[(\d+)\]\[(\d+)\] has content '(.*?)'", table_str)
36
+
37
+ # Find the number of rows and columns
38
+ num_rows = max([int(cell[0]) for cell in cells]) + 1
39
+ num_cols = max([int(cell[1]) for cell in cells]) + 1
40
+
41
+ # Initialize table with empty strings
42
+ table = [["" for _ in range(num_cols)] for _ in range(num_rows)]
43
+
44
+ # Fill table based on cell coordinates
45
+ for cell in cells:
46
+ row, col, content = int(cell[0]), int(cell[1]), cell[2]
47
+ table[row][col] = content
48
+
49
+ # Adding table to the dictionary
50
+ json_tables[f"table_{table_counter}"] = table
51
+
52
+ # Increment the table counter
53
+ table_counter += 1
54
+
55
+ # Converting the dictionary to a JSON string
56
+ json_string = json.dumps(json_tables)
57
+
58
+ return json_string
59
+
60
+ def extract_text_within_range(input_string, x_range, y_range):
61
+ pattern = r"Line # \d+ text '([^']*)' within bounding polygon '(\[[\d.]+, [\d.]+\], \[[\d.]+, [\d.]+\], \[[\d.]+, [\d.]+\], \[[\d.]+, [\d.]+\])'"
62
+ matches = re.findall(pattern, input_string)
63
+
64
+ output = []
65
+
66
+ for text, polygon_str in matches:
67
+ polygon = eval(polygon_str) # Convert string to list of coordinates
68
+ for (x, y) in polygon:
69
+ if x_range[0] <= x <= x_range[1] and y_range[0] <= y <= y_range[1]:
70
+ output.append(text)
71
+ break # If any coordinate is within range, add the text to the output
72
+
73
+ return output
74
+
75
+ def merge_strings(input_string, input_coords, extract_coords):
76
+ lines1 = input_string.split('\n')
77
+ lines2 = input_coords.split('\n')
78
+ # Filter out empty lines and strip leading/trailing whitespaces
79
+ lines2 = [line.strip() for line in lines2 if line.strip()]
80
+
81
+ logging.info(lines2)
82
+ # Creating dictionaries to store the key-value pairs
83
+ dict1 = {line.split(": ")[0]: line.split(": ")[1] for line in lines1}
84
+ dict2 = {line.split(": ")[0]: line.split(": ")[1] for line in lines2}
85
+
86
+ # Updating the values in dict1 with the ones from dict2 if they share the same key
87
+ for key in dict1.keys():
88
+ if key in dict2:
89
+ dict1[key] = dict2[key]
90
+
91
+ for key, coord_str in dict1.items():
92
+ if coord_str.startswith('('): # check if the string represents a tuple
93
+ # Parse coordinates
94
+ coords = eval(coord_str)
95
+ # Convert coordinates into x and y ranges
96
+ x_range = (coords[0][0], coords[1][0])
97
+ y_range = (coords[0][1], coords[1][1])
98
+ # Use the function to extract the text
99
+ text = extract_text_within_range(extract_coords, x_range, y_range)
100
+ # Update the dictionary with the extracted text or '-||-' if empty
101
+ dict1[key] = ', '.join(text) if text else '-||-'
102
+
103
+ # Constructing the updated string1
104
+ input_string = '\n'.join([f"{key}: {value}" for key, value in dict1.items()])
105
+
106
+ return input_string
ocr_functions.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from azure.core.credentials import AzureKeyCredential
2
+ from azure.ai.formrecognizer import DocumentAnalysisClient
3
+ from io import BytesIO
4
+ from helpers import format_polygon
5
+ import logging
6
+ import os
7
+
8
+ logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
9
+ endpoint = os.environ['AZURE_API_ENDPOINT']
10
+ key = os.environ['AZURE_API_KEY']
11
+
12
+ def detect_document(content):
13
+ document_analysis_client = DocumentAnalysisClient(
14
+ endpoint=endpoint, credential=AzureKeyCredential(key)
15
+ )
16
+
17
+ with open(content.name, "rb") as f:
18
+ poller = document_analysis_client.begin_analyze_document(
19
+ "prebuilt-document", document=f
20
+ )
21
+ result = poller.result()
22
+
23
+ pair_content = "----Key-value pairs found in document----\n"
24
+ for kv_pair in result.key_value_pairs:
25
+ if kv_pair.key and kv_pair.value:
26
+ pair_content += "Key '{}' with Value '{}' \n".format(
27
+ kv_pair.key.content,
28
+ kv_pair.value.content
29
+ )
30
+ logging.info(pair_content)
31
+ document_content = "----Lines found in document----\n"
32
+ for page in result.pages:
33
+ for line_idx, line in enumerate(page.lines):
34
+ document_content += "...Line # {} text '{}' within bounding polygon '{}' \n".format(
35
+ line_idx,
36
+ line.content,
37
+ format_polygon(line.polygon),
38
+ )
39
+ logging.info(document_content)
40
+ table_content = "----Tables found in document----\n"
41
+ for table_idx, table in enumerate(result.tables):
42
+ table_content += "Table # {} has {} rows and {} columns\n".format(
43
+ table_idx, table.row_count, table.column_count
44
+ )
45
+ for cell in table.cells:
46
+ table_content += "...Cell[{}][{}] has content '{}'\n".format(
47
+ cell.row_index,
48
+ cell.column_index,
49
+ cell.content,
50
+ )
51
+ logging.info(table_content)
52
+ name = content.name.split('\\')[-1]
53
+ name = name.split('.')[0]
54
+ return (pair_content, document_content, table_content, name)
55
+
56
+ def detect_image(content):
57
+ document_analysis_client = DocumentAnalysisClient(
58
+ endpoint=endpoint, credential=AzureKeyCredential(key)
59
+ )
60
+ byte_stream = BytesIO()
61
+ content.save(byte_stream, format='PNG') # or 'JPEG', 'BMP', etc. depending on your image
62
+ byte_stream.seek(0) # reset pointer back to the start of the stream
63
+ poller = document_analysis_client.begin_analyze_document(
64
+ "prebuilt-read", document=byte_stream
65
+ )
66
+
67
+ result = poller.result()
68
+ logging.info(result.content)
69
+ return(result.content)
output.json DELETED
@@ -1,146 +0,0 @@
1
- [
2
- {
3
- "File Name": "DARAGA-ALCALA-0013",
4
- "General Information": {
5
- "ARP No.": "1017-64-0003-00007",
6
- "Owner": "SAME",
7
- "Address": "",
8
- "Tel No.": "",
9
- "Administrator/Beneficial User": "",
10
- "Address:": "",
11
- "Tel No.:": "",
12
- "PIN": "03/0400301007-1001",
13
- "TIN_1": "",
14
- "TIN_2": ""
15
- },
16
- "Building Location": {
17
- "No. / Street": "",
18
- "Brgy/District": "ALCALA",
19
- "Municipality": "DARAGA",
20
- "Province/city": "ALBAY"
21
- },
22
- "Land Reference": {
23
- "Owner": "",
24
- "OCT/TCT/CLOA NO.": "",
25
- "Lot No.": "",
26
- "Survey No.": "3096-P",
27
- "Blk No.": "",
28
- "TD/ARP No.:": "",
29
- "Area": "+6647 sq.m."
30
- },
31
- "Property Appraisal": {
32
- "Kind of Bldg": "",
33
- "Structural Type": "M-C",
34
- "Bldg. Permit No.": "",
35
- "Date Issued": "",
36
- "Condominium Certificate of Title(CCT)": "",
37
- "Certificate of Completion Issued on": "",
38
- "Certificate of Occupancy Issued on": "",
39
- "Date Constructed/Completed": "",
40
- "Date Occupied": "",
41
- "Bldg. Age": "",
42
- "No. of Storeys": "",
43
- "Area of 1st Flr": "-||-",
44
- "Area of 2nd Flr": "-||-",
45
- "Area of 3rd Flr": "-||-",
46
- "Area of 4th Flr": "-||-",
47
- "Total Floor Area": "12 sq.m."
48
- }
49
- },
50
- {
51
- "File Name": "DARAGA-ALCALA-0017",
52
- "General Information": {
53
- "ARP No.": "2017-04-0000-00009",
54
- "Owner": "Demetrio Madrona",
55
- "Address": "",
56
- "Tel No.": "",
57
- "Administrator/Beneficial User": "",
58
- "Address:": "",
59
- "Tel No.:": "",
60
- "PIN": "0310 400 301008-1001",
61
- "TIN_1": "",
62
- "TIN_2": ""
63
- },
64
- "Building Location": {
65
- "No. / Street": "",
66
- "Brgy/District": "Alcala Daraga",
67
- "Municipality": "Albay",
68
- "Province/city": "Albay"
69
- },
70
- "Land Reference": {
71
- "Owner": "",
72
- "OCT/TCT/CLOA NO.": "",
73
- "Lot No.": "",
74
- "Survey No.": "300",
75
- "Blk No.": "",
76
- "TD/ARP No.:": "",
77
- "Area": "6 SQ.M."
78
- },
79
- "Property Appraisal": {
80
- "Kind of Bldg": "",
81
- "Structural Type": "V",
82
- "Bldg. Permit No.": "",
83
- "Date Issued": "",
84
- "Condominium Certificate of Title(CCT)": "",
85
- "Certificate of Completion Issued on": "",
86
- "Certificate of Occupancy Issued on": "",
87
- "Date Constructed/Completed": "",
88
- "Date Occupied": "",
89
- "Bldg. Age": "",
90
- "No. of Storeys": "",
91
- "Area of 1st Flr": "",
92
- "Area of 2nd Flr": "",
93
- "Area of 3rd Flr": "",
94
- "Area of 4th Flr": "",
95
- "Total Floor Area": "6 SQ.M."
96
- }
97
- },
98
- {
99
- "File Name": "DARAGA-ALCALA-0019",
100
- "General Information": {
101
- "ARP No.": "01 04-00 000 0",
102
- "Owner": "DEMETRIO MADRONA",
103
- "Address": "",
104
- "Tel No.": "",
105
- "Administrator/Beneficial User": "",
106
- "Address:": "",
107
- "Tel No.:": "",
108
- "PIN": "03 400301008 -1002",
109
- "TIN_1": "",
110
- "TIN_2": ""
111
- },
112
- "Building Location": {
113
- "No. / Street": "",
114
- "Brgy/District": "ALCALA",
115
- "Municipality": "DARAGA",
116
- "Province/city": "ALBAY"
117
- },
118
- "Land Reference": {
119
- "Owner": "",
120
- "OCT/TCT/CLOA NO.": "",
121
- "Lot No.": "",
122
- "Survey No.": "",
123
- "Blk No.": "",
124
- "TD/ARP No.:": "",
125
- "Area": "6 SQ.M."
126
- },
127
- "Property Appraisal": {
128
- "Kind of Bldg": "",
129
- "Structural Type": "IV",
130
- "Bldg. Permit No.": "",
131
- "Date Issued": "",
132
- "Condominium Certificate of Title(CCT)": "",
133
- "Certificate of Completion Issued on": "",
134
- "Certificate of Occupancy Issued on": "",
135
- "Date Constructed/Completed": "",
136
- "Date Occupied": "",
137
- "Bldg. Age": "",
138
- "No. of Storeys": "",
139
- "Area of 1st Flr": "",
140
- "Area of 2nd Flr": "",
141
- "Area of 3rd Flr": "",
142
- "Area of 4th Flr": "",
143
- "Total Floor Area": "6 SQ.M."
144
- }
145
- }
146
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
settings.py CHANGED
@@ -1,13 +1,8 @@
1
  #OpenAI Variables
2
  gpt_model = "gpt-3.5-turbo"
3
 
4
- #Google Variables
5
- project_id = "advance-river-381411"
6
- project_location = "us"
7
- processor_id = "31bc9a6106cb3cac"
8
-
9
  #Company Specfic Variables
10
- char_remove = ["N/A", "(Use additional sheet if necessary)", "(not provided)", "Not specified", "-||-", "0 sq.m."]
11
 
12
  #Company Documents
13
  RPFAAP2 = '''
@@ -29,9 +24,9 @@ RPFAAP2 = '''
29
  Memoranda: -||-
30
  Date of Entry in the Record of Assessment: -||-
31
  Name: -||-
32
- PIN: <Numerical Value Only, Replace Slashes with the number 1>
33
- ARP No.: <Numerical Value Only, Replace Slashes with the number 1>
34
- TD No.: <Numerical Value Only, Replace Slashes with the number 1>
35
  Total Assessed Value: -||-
36
  Previous Owner: <Person's Name>
37
  Effectivity of Assessment: -||-
@@ -41,16 +36,16 @@ RPFAAP2 = '''
41
 
42
  RPFAAP1 = '''
43
  Desired Format:
44
- ARP No.: <Numerical Value Only, Replace Slashes with the number 1>
45
- Owner: <Person's Name>
46
  Address: -||-
47
  Tel No.: -||-
48
  Administrator/Beneficial User: -||-
49
  Address: -||-
50
  Tel No.: -||-
51
- PIN: <Numerical Value Only, Replace Slashes with the number 1>
52
- TIN_1: <Numerical Value Only, Replace Slashes with the number 1>
53
- TIN_2: <Numerical Value Only, Replace Slashes with the number 1>
54
  No. / Street: -||-
55
  Brgy/District: -||-
56
  Municipality: -||-
@@ -60,10 +55,10 @@ RPFAAP1 = '''
60
  Lot No.: -||-
61
  Survey No.: -||-
62
  Blk No.: -||-
63
- TD/ARP No.: <Numerical Value Only, Replace Slashes with the number 1>
64
  Area: <Area in SQ.M.>
65
  Kind of Bldg.: -||-
66
- Structural Type: -||-
67
  Bldg. Permit No.: -||-
68
  Date Issued: -||-
69
  Condominium Certificate of Title(CCT): -||-
@@ -78,4 +73,51 @@ RPFAAP1 = '''
78
  Area of 3rd Flr: <Numerical Value in SQ.M.>
79
  Area of 4th Flr: <Numerical Value in SQ.M.>
80
  Total Floor Area: <Numerical Value in SQ.M.>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  '''
 
1
  #OpenAI Variables
2
  gpt_model = "gpt-3.5-turbo"
3
 
 
 
 
 
 
4
  #Company Specfic Variables
5
+ char_remove = ["N/A", "(Use additional sheet if necessary)", "(not provided)", "Not specified", "-||-", "0 sq.m.", "(not provided in text)"]
6
 
7
  #Company Documents
8
  RPFAAP2 = '''
 
24
  Memoranda: -||-
25
  Date of Entry in the Record of Assessment: -||-
26
  Name: -||-
27
+ PIN: <Numerical Value Only>
28
+ ARP No.: <Numerical Value Only>
29
+ TD No.: <Numerical Value Only>
30
  Total Assessed Value: -||-
31
  Previous Owner: <Person's Name>
32
  Effectivity of Assessment: -||-
 
36
 
37
  RPFAAP1 = '''
38
  Desired Format:
39
+ ARP No.: <Numerical Value Only>
40
+ OWNER: <Person's Name>
41
  Address: -||-
42
  Tel No.: -||-
43
  Administrator/Beneficial User: -||-
44
  Address: -||-
45
  Tel No.: -||-
46
+ PIN: <Numerical Value Only>
47
+ TIN_1: <Numerical Value Only>
48
+ TIN_2: <Numerical Value Only>
49
  No. / Street: -||-
50
  Brgy/District: -||-
51
  Municipality: -||-
 
55
  Lot No.: -||-
56
  Survey No.: -||-
57
  Blk No.: -||-
58
+ TD/ARP No.: <Numerical Value Only>
59
  Area: <Area in SQ.M.>
60
  Kind of Bldg.: -||-
61
+ Structural Type: <Roman Numerals Only>
62
  Bldg. Permit No.: -||-
63
  Date Issued: -||-
64
  Condominium Certificate of Title(CCT): -||-
 
73
  Area of 3rd Flr: <Numerical Value in SQ.M.>
74
  Area of 4th Flr: <Numerical Value in SQ.M.>
75
  Total Floor Area: <Numerical Value in SQ.M.>
76
+ '''
77
+
78
+ TDRP = '''
79
+ Desired Format:
80
+ TD No.: -||-
81
+ Property Identification No.: -||-
82
+ Owner: -||-
83
+ TIN_1: -||-
84
+ Address_1: -||-
85
+ Telephone No._1: -||-
86
+ Administrator/Beneficial User: -||-
87
+ TIN_2: -||-
88
+ Address_2: -||-
89
+ Telephone No._2: -||-
90
+ Number and Street: -||-
91
+ Barangay/District: -||-
92
+ Municipality & Province/City: -||-
93
+ OCT/TCT/CLOA No.: -||-
94
+ Survey No.: -||-
95
+ CCT: -||-
96
+ Lot No.: -||-
97
+ Dated: -||-
98
+ Blk No.: -||-
99
+ North: -||-
100
+ South: -||-
101
+ East: -||-
102
+ West: -||-
103
+ Land: -||-
104
+ Building: -||-
105
+ No. of Storeys: -||-
106
+ Brief Description_1: -||-
107
+ Machinery: -||-
108
+ Brief Description_2: -||-
109
+ Others: -||-
110
+ Specify: -||-
111
+ Total Assessed Value: -||-
112
+ Taxable: -||-
113
+ QTR: -||-
114
+ Year: -||-
115
+ This declaration cancels TD No.: -||-
116
+ Owner: -||-
117
+ Previous A.V. Php: -||-
118
+ Memoranda: -||-
119
+ '''
120
+
121
+ TDRP_COORDS = '''
122
+ North: (193, 580), (600, 640)
123
  '''