Sage
commited on
Commit
•
3dd785b
1
Parent(s):
fd573a1
Big Commit
Browse files- RPFAA Building P1.json +0 -1
- TDRP.json +53 -0
- ai_functions.py +106 -0
- app.log +0 -678
- app.py +33 -213
- gr.py +0 -11
- helpers.py +106 -0
- ocr_functions.py +69 -0
- output.json +0 -146
- requirements.txt +0 -0
- settings.py +58 -16
RPFAA Building P1.json
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
[{"File Name": "DARAGA-ALCALA-0017", "General Information": {"ARP No.": "2017-04-0000-00009", "Owner": "Rudy Madrona", "Address": "", "Tel No.": "", "Administrator/Beneficial User": "", "Address:": "", "Tel No.:": "", "PIN": "0310 400 301008-1001", "TIN_1": "", "TIN_2": ""}, "Building Location": {"No. / Street": "", "Brgy/District": "", "Municipality": "", "Province/city": ""}, "Land Reference": {"Owner": "", "OCT/TCT/CLOA NO.": "", "Lot No.": "", "Survey No.": "", "Blk No.": "", "TD/ARP No.:": "", "Area": ""}, "Property Appraisal": {"Kind of Bldg": "", "Structural Type": "", "Bldg. Permit No.": "", "Date Issued": "", "Condominium Certificate of Title(CCT)": "", "Certificate of Completion Issued on": "", "Certificate of Occupancy Issued on": "", "Date Constructed/Completed": "", "Date Occupied": "", "Bldg. Age": "", "No. of Storeys": "", "Area of 1st Flr": "", "Area of 2nd Flr": "", "Area of 3rd Flr": "", "Area of 4th Flr": "", "Total Floor Area": ""}}, {"File Name": "DARAGA-ALCALA-0033", "General Information": {"ARP No.": "2011-64-0003-00017", "Owner": "LLANTOS, JULIAN", "Address": "", "Tel No.": "", "Administrator/Beneficial User": "", "Address:": "", "Tel No.:": "", "PIN": "0310 400301012-100", "TIN_1": "", "TIN_2": ""}, "Building Location": {"No. / Street": "", "Brgy/District": "", "Municipality": "", "Province/city": ""}, "Land Reference": {"Owner": "", "OCT/TCT/CLOA NO.": "", "Lot No.": "", "Survey No.": "", "Blk No.": "", "TD/ARP No.:": "", "Area": ""}, "Property Appraisal": {"Kind of Bldg": "", "Structural Type": "", "Bldg. Permit No.": "", "Date Issued": "", "Condominium Certificate of Title(CCT)": "", "Certificate of Completion Issued on": "", "Certificate of Occupancy Issued on": "", "Date Constructed/Completed": "", "Date Occupied": "", "Bldg. Age": "", "No. of Storeys": "", "Area of 1st Flr": "", "Area of 2nd Flr": "", "Area of 3rd Flr": "", "Area of 4th Flr": "", "Total Floor Area": ""}}, {"File Name": "DARAGA-ALCALA-0071", "General Information": {"ARP No.": "2017-04-0003-000361", "Owner": "ROLANDO LISTANA", "Address": "", "Tel No.": "", "Administrator/Beneficial User": "", "Address:": "", "Tel No.:": "", "PIN": "0310400301024100", "TIN_1": "", "TIN_2": ""}, "Building Location": {"No. / Street": "", "Brgy/District": "", "Municipality": "", "Province/city": ""}, "Land Reference": {"Owner": "", "OCT/TCT/CLOA NO.": "", "Lot No.": "", "Survey No.": "", "Blk No.": "", "TD/ARP No.:": "", "Area": ""}, "Property Appraisal": {"Kind of Bldg": "", "Structural Type": "", "Bldg. Permit No.": "", "Date Issued": "", "Condominium Certificate of Title(CCT)": "", "Certificate of Completion Issued on": "", "Certificate of Occupancy Issued on": "", "Date Constructed/Completed": "", "Date Occupied": "", "Bldg. Age": "", "No. of Storeys": "", "Area of 1st Flr": "", "Area of 2nd Flr": "", "Area of 3rd Flr": "", "Area of 4th Flr": "", "Total Floor Area": ""}}]
|
|
|
|
TDRP.json
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"File Name": "%s",
|
2 |
+
"General Information": {
|
3 |
+
"TD No.": "",
|
4 |
+
"Property Identification No.": "",
|
5 |
+
"Owner": "",
|
6 |
+
"TIN_1": "",
|
7 |
+
"Address_1": "",
|
8 |
+
"Telephone No._1": "",
|
9 |
+
"Administrator/Beneficial User": "",
|
10 |
+
"TIN_2": "",
|
11 |
+
"Address_2": "",
|
12 |
+
"Telephone No._2": ""
|
13 |
+
},
|
14 |
+
"Location of Property": {
|
15 |
+
"Number and Street": "",
|
16 |
+
"Barangay/District": "",
|
17 |
+
"Municipality & Province/City": ""
|
18 |
+
},
|
19 |
+
"Land Reference": {
|
20 |
+
"OCT/TCT/CLOA No.": "",
|
21 |
+
"Survey No.": "",
|
22 |
+
"CCT": "",
|
23 |
+
"Lot No.": "",
|
24 |
+
"Dated": "",
|
25 |
+
"Blk No.": ""
|
26 |
+
},
|
27 |
+
"Boundaries": {
|
28 |
+
"North": "",
|
29 |
+
"South": "",
|
30 |
+
"East": "",
|
31 |
+
"West": ""
|
32 |
+
},
|
33 |
+
"Kind of Property Assessed": {
|
34 |
+
"Land": "",
|
35 |
+
"Building": "",
|
36 |
+
"No. of Storeys": "",
|
37 |
+
"Brief Description_1": "",
|
38 |
+
"Machinery": "",
|
39 |
+
"Brief Description_2": "",
|
40 |
+
"Others": "",
|
41 |
+
"Specify": ""
|
42 |
+
},
|
43 |
+
"Property Assesment": {
|
44 |
+
"Total Assessed Value": "",
|
45 |
+
"Taxable": "",
|
46 |
+
"QTR": "",
|
47 |
+
"Year": "",
|
48 |
+
"This declaration cancels TD No.": "",
|
49 |
+
"Owner": "",
|
50 |
+
"Previous A.V. Php": "",
|
51 |
+
"Memoranda": ""
|
52 |
+
}
|
53 |
+
}
|
ai_functions.py
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from settings import gpt_api_key, gpt_model, RPFAAP2, RPFAAP1, TDRP, TDRP_COORDS
|
2 |
+
import openai
|
3 |
+
import json
|
4 |
+
import logging
|
5 |
+
from helpers import remove_na, filter_tables, merge_strings
|
6 |
+
import os
|
7 |
+
logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
8 |
+
|
9 |
+
def chat_gpt_image(content, context):
|
10 |
+
openai.api_key = os.environ['GPT_API_KEY']
|
11 |
+
prompt = "You are an expert at identifying OCR errors and correcting them with the help of context, intuition and logic."
|
12 |
+
document = "The following text was scanned using OCR, your goal is to return a corrected version of the text"
|
13 |
+
prefix = "Additionally"
|
14 |
+
if context == "":
|
15 |
+
sequence = (document, content)
|
16 |
+
else:
|
17 |
+
sequence_1 = (prefix, context)
|
18 |
+
additional = (" ".join(sequence_1))
|
19 |
+
sequence = (additional, content)
|
20 |
+
|
21 |
+
final_content = (" ".join(sequence))
|
22 |
+
logging.info(final_content)
|
23 |
+
completion = openai.ChatCompletion.create(
|
24 |
+
model=gpt_model,
|
25 |
+
user="1",
|
26 |
+
messages=[
|
27 |
+
{"role": "system", "content": prompt},
|
28 |
+
{"role": "user", "content": final_content}
|
29 |
+
]
|
30 |
+
)
|
31 |
+
logging.info(completion.choices[0].message.content)
|
32 |
+
return(completion.choices[0].message.content)
|
33 |
+
|
34 |
+
def chat_gpt_document(content, document_type, context):
|
35 |
+
openai.api_key = os.environ['GPT_API_KEY']
|
36 |
+
prompt = "You are an expert at identifying OCR errors and correcting them with the help of context, intuition and logic."
|
37 |
+
document_prefix = "The following text was scanned using OCR, your goal is to extract the important entities from the text and correct them with the help of the restrictions placed in the desired format. Remember to not make any changes on the labels and do not populate fields that don't have the , simply extract the text, correct it and return only the desired format. Leave the field blank if it cannot be found in the text. Text:"
|
38 |
+
additional_prefix = "Additionally the text"
|
39 |
+
|
40 |
+
pair_content = content[0]
|
41 |
+
document_content = content[1]
|
42 |
+
table_content = content[2]
|
43 |
+
content_name = content[3]
|
44 |
+
|
45 |
+
if document_type == "RPFAA Building P1":
|
46 |
+
document = "RPFAAP1.json"
|
47 |
+
desired_format = RPFAAP1
|
48 |
+
tables = [3]
|
49 |
+
input_coords = TDRP_COORDS
|
50 |
+
elif document_type == "RPFAA Building P2":
|
51 |
+
document = "RPFAAP2.json"
|
52 |
+
desired_format = RPFAAP2
|
53 |
+
tables = []
|
54 |
+
input_coords = TDRP_COORDS
|
55 |
+
elif document_type == "TDRP":
|
56 |
+
document = "TDRP.json"
|
57 |
+
desired_format = TDRP
|
58 |
+
tables = [0]
|
59 |
+
input_coords = TDRP_COORDS
|
60 |
+
else:
|
61 |
+
property_info = ["Please Select a Document Type"]
|
62 |
+
return json.dumps(property_info, indent=4)
|
63 |
+
|
64 |
+
if context == "":
|
65 |
+
sequence_1 = (document_prefix, pair_content, desired_format)
|
66 |
+
else:
|
67 |
+
sequence_1 = (document_prefix, pair_content, desired_format, additional_prefix, context)
|
68 |
+
|
69 |
+
content_1 = (" ".join(sequence_1))
|
70 |
+
logging.info(content_1)
|
71 |
+
|
72 |
+
completion_1 = openai.ChatCompletion.create(
|
73 |
+
model=gpt_model,
|
74 |
+
user="1",
|
75 |
+
messages=[
|
76 |
+
{"role": "system", "content": prompt},
|
77 |
+
{"role": "user", "content": content_1}
|
78 |
+
]
|
79 |
+
)
|
80 |
+
logging.info(completion_1.choices[0].message.content)
|
81 |
+
input_string = remove_na(completion_1.choices[0].message.content)
|
82 |
+
input_string = merge_strings(input_string,input_coords,document_content)
|
83 |
+
|
84 |
+
with open(document) as f:
|
85 |
+
property_info = json.load(f)
|
86 |
+
#Adds the name of the file
|
87 |
+
property_info["File Name"] = content_name
|
88 |
+
#Fills in the information
|
89 |
+
for line in input_string.split('\n'):
|
90 |
+
if ':' in line:
|
91 |
+
key, value = line.split(':', 1)
|
92 |
+
key = key.strip()
|
93 |
+
for category in property_info:
|
94 |
+
if key in property_info[category]:
|
95 |
+
property_info[category][key] = value.strip()
|
96 |
+
break
|
97 |
+
else:
|
98 |
+
if key == "File Name":
|
99 |
+
property_info[key] = value.strip()
|
100 |
+
|
101 |
+
json.dumps(property_info, indent=4)
|
102 |
+
table_string = filter_tables(table_content, tables)
|
103 |
+
table_dict = json.loads(table_string)
|
104 |
+
property_info.update(table_dict)
|
105 |
+
json_string = json.dumps(property_info, indent=4)
|
106 |
+
return json_string
|
app.log
CHANGED
@@ -1,678 +0,0 @@
|
|
1 |
-
2023-05-11 19:13:33,773 - INFO - The following text was scanned using OCR, your goal is to extract the important entities from the text and correct them with the help of the restrictions placed in the desired format. Remember to not make any changes on the labels of the desired format, simply extract the text, correct it and return only the desired format. Text: ARP No. 2017-04-0000-00009
|
2 |
-
OWNER: RUDY MADRONA
|
3 |
-
Address: ALCALA DARAGA, ALBAY
|
4 |
-
Tel No.:
|
5 |
-
Administrator/Beneficial User:
|
6 |
-
Address:
|
7 |
-
Tel No.:
|
8 |
-
BUILDING LOCATION
|
9 |
-
No. / Street
|
10 |
-
Brgy/District
|
11 |
-
ALCALA
|
12 |
-
DA RAGA
|
13 |
-
Municipality:
|
14 |
-
Province/City
|
15 |
-
ALBAY
|
16 |
-
PROPERTY APPRAISAL
|
17 |
-
Kind of Bldg.
|
18 |
-
Structural Type V
|
19 |
-
Bldg. Permit No.
|
20 |
-
Date Issued
|
21 |
-
Condominium Certificate of Title(CCT)
|
22 |
-
Certificate of Completion Issued On:
|
23 |
-
Certificate of Occupancy Issued On:
|
24 |
-
Date Constructed/Completed:
|
25 |
-
Date Occupied:
|
26 |
-
REAL PROPERTY FIELD APPRAISAL & ASSESSMENT SHEET - BUILDING & OTHER
|
27 |
-
STRUCTURES
|
28 |
-
PIN 0310 400 301008 -1001
|
29 |
-
TIN
|
30 |
-
Tiles
|
31 |
-
STRUCTURAL MATERIALS (Checklist)
|
32 |
-
ROOF
|
33 |
-
Reinforced Concrete
|
34 |
-
G.I. Sheet
|
35 |
-
Aluminum
|
36 |
-
Asbestos
|
37 |
-
Long Span
|
38 |
-
Concrete Desk
|
39 |
-
Nipa/Anahaw/Gogon
|
40 |
-
Others (Specify)
|
41 |
-
FLOORING
|
42 |
-
Reinforced
|
43 |
-
Concrete
|
44 |
-
(for upper
|
45 |
-
floor)
|
46 |
-
Plain Cement
|
47 |
-
TIN
|
48 |
-
Floor Plan:
|
49 |
-
Attach the building plan sketch of floor plan. A photograph may also be attached if necessary.
|
50 |
-
Marble
|
51 |
-
Wood
|
52 |
-
Tiles
|
53 |
-
Others
|
54 |
-
(specify)
|
55 |
-
LAND REFERENCE
|
56 |
-
Owner
|
57 |
-
OCT/TCT/CLOA No.
|
58 |
-
Lot No.
|
59 |
-
MADRONA, DEMETRIO
|
60 |
-
TD/ARP No:
|
61 |
-
Area
|
62 |
-
Bldg. Age
|
63 |
-
No. of Storeys
|
64 |
-
Area of 1st flr:
|
65 |
-
Area of 2nd flr:
|
66 |
-
Area of 3rd flr:
|
67 |
-
Area of 4th flr:
|
68 |
-
TRANSACTION CODE
|
69 |
-
Total Floor Area: 6 SQ.M.
|
70 |
-
1st 2nd 3rd 4th
|
71 |
-
Flr. Flr. Flr. Flr.
|
72 |
-
Plain
|
73 |
-
Cement
|
74 |
-
Wood
|
75 |
-
Walls & 1st 2nd 3rd 4th
|
76 |
-
Partitions Flr. Flr. Flr. Flr.
|
77 |
-
Reinforced
|
78 |
-
Concrete
|
79 |
-
CHB
|
80 |
-
G.I Sheet
|
81 |
-
Survey No. 300
|
82 |
-
Blk No.
|
83 |
-
Build-a-
|
84 |
-
wall
|
85 |
-
Sawali
|
86 |
-
Bamboo
|
87 |
-
Others
|
88 |
-
(Specify)
|
89 |
-
|
90 |
-
Desired Format:
|
91 |
-
ARP No.: <Numerical Value Only, Replace Slashes with the number 1>
|
92 |
-
Owner: <Person's Name>
|
93 |
-
Address: -||-
|
94 |
-
Tel No.: -||-
|
95 |
-
Administrator/Beneficial User: -||-
|
96 |
-
Address: -||-
|
97 |
-
Tel No.: -||-
|
98 |
-
PIN: <Numerical Value Only, Replace Slashes with the number 1>
|
99 |
-
TIN_1: <Numerical Value Only, Replace Slashes with the number 1>
|
100 |
-
TIN_2: <Numerical Value Only, Replace Slashes with the number 1>
|
101 |
-
|
102 |
-
2023-05-11 19:13:37,266 - INFO - ARP No.: 2017-04-0000-00009
|
103 |
-
Owner: RUDY MADRONA
|
104 |
-
Address: ALCALA DARAGA, ALBAY
|
105 |
-
Tel No.:
|
106 |
-
Administrator/Beneficial User: -||-
|
107 |
-
Address: -||-
|
108 |
-
Tel No.: -||-
|
109 |
-
PIN: 0310 400 301008-1001
|
110 |
-
TIN_1: -||-
|
111 |
-
TIN_2: -||-
|
112 |
-
2023-05-11 19:13:38,952 - INFO - The following text was scanned using OCR, your goal is to extract the important entities from the text and correct them with the help of the restrictions placed in the desired format. Remember to not make any changes on the labels of the desired format, simply extract the text, correct it and return only the desired format. Text: ARP Np. 2011-64-0003-00017
|
113 |
-
OWNER: LLANTOS, JULIAN
|
114 |
-
Address: ALCALA, DARAGA, AMBAY
|
115 |
-
Tel No.:
|
116 |
-
Administrator/Beneficial User:
|
117 |
-
Address:
|
118 |
-
Tel No.:
|
119 |
-
BUILDING LOCATION
|
120 |
-
No. / Street
|
121 |
-
Brgy/District
|
122 |
-
ALCALA
|
123 |
-
DARAGA
|
124 |
-
ALBAY
|
125 |
-
Municipality:
|
126 |
-
Province/City
|
127 |
-
PROPERTY APPRAISAL
|
128 |
-
Kind of Bldg.
|
129 |
-
Structural Type 111-C
|
130 |
-
Bldg. Permit No.
|
131 |
-
Date Issued
|
132 |
-
Condominium Certificate of Title(CCT)
|
133 |
-
Certificate of Completion Issued On:
|
134 |
-
Certificate of Occupancy Issued On:
|
135 |
-
REAL PROPERTY FIELD APPRAISAL & ASSESSMENT SHEET - BUILDING & OTHER
|
136 |
-
STRUCTURES
|
137 |
-
PIN 0310 400301012-100)
|
138 |
-
TIN
|
139 |
-
Date Constructed/Completed:
|
140 |
-
Date Occupied:
|
141 |
-
STRUCTURAL MATERIALS (Checklist)
|
142 |
-
Tiles
|
143 |
-
ROOF
|
144 |
-
Reinforced Concrete
|
145 |
-
G.I. Sheet
|
146 |
-
Aluminum
|
147 |
-
Asbestos
|
148 |
-
Long Span
|
149 |
-
Concrete Desk
|
150 |
-
Nipa/Anahaw/Gogon
|
151 |
-
Others (Specify)
|
152 |
-
Floor Plan:
|
153 |
-
Attach the building plan sketch of floor plan. A photograph may also be attached if necessary.
|
154 |
-
FLOORING
|
155 |
-
Reinforced
|
156 |
-
Concrete
|
157 |
-
(for upper
|
158 |
-
floor)
|
159 |
-
Plain Cement
|
160 |
-
Marble
|
161 |
-
Wood
|
162 |
-
Tiles
|
163 |
-
TIN
|
164 |
-
Others
|
165 |
-
(specify)
|
166 |
-
LAND REFERENCE
|
167 |
-
Owner
|
168 |
-
OCT/TCT/CLOA No.
|
169 |
-
Lot No. 3095-P
|
170 |
-
TD/ARP No:
|
171 |
-
Area
|
172 |
-
Bldg. Age
|
173 |
-
No. of Storeys
|
174 |
-
Area of 1st flr:
|
175 |
-
Area of 2nd flr:
|
176 |
-
Area of 3rd flr:
|
177 |
-
Area of 4th flr:
|
178 |
-
Total Floor Area: 49 SQ.M.
|
179 |
-
1st 2nd 3rd 4th
|
180 |
-
Flr. Flr. Flr. Flr.
|
181 |
-
TRANSACTION CODE
|
182 |
-
Walls &
|
183 |
-
Partitions
|
184 |
-
Reinforced
|
185 |
-
Concrete
|
186 |
-
Plain
|
187 |
-
Cement
|
188 |
-
Wood
|
189 |
-
CHB
|
190 |
-
G.I Sheet
|
191 |
-
Build-a-
|
192 |
-
wall
|
193 |
-
Sawali
|
194 |
-
Bamboo
|
195 |
-
Others
|
196 |
-
(Specify)
|
197 |
-
Survey No.
|
198 |
-
Bik No.
|
199 |
-
1st 2nd 3rd 4th
|
200 |
-
Flr. Fir. Flr. Flr.
|
201 |
-
|
202 |
-
Desired Format:
|
203 |
-
ARP No.: <Numerical Value Only, Replace Slashes with the number 1>
|
204 |
-
Owner: <Person's Name>
|
205 |
-
Address: -||-
|
206 |
-
Tel No.: -||-
|
207 |
-
Administrator/Beneficial User: -||-
|
208 |
-
Address: -||-
|
209 |
-
Tel No.: -||-
|
210 |
-
PIN: <Numerical Value Only, Replace Slashes with the number 1>
|
211 |
-
TIN_1: <Numerical Value Only, Replace Slashes with the number 1>
|
212 |
-
TIN_2: <Numerical Value Only, Replace Slashes with the number 1>
|
213 |
-
|
214 |
-
2023-05-11 19:13:42,282 - INFO - ARP No.: 2011-64-0003-00017
|
215 |
-
Owner: JULIAN LLANTOS
|
216 |
-
Address: ALCALA, DARAGA, ALBAY
|
217 |
-
Tel No.: -||-
|
218 |
-
Administrator/Beneficial User: -||-
|
219 |
-
Address: -||-
|
220 |
-
Tel No.: -||-
|
221 |
-
PIN: 0310400301012100
|
222 |
-
TIN_1: -||-
|
223 |
-
TIN_2: -||-
|
224 |
-
2023-05-11 19:13:44,006 - INFO - The following text was scanned using OCR, your goal is to extract the important entities from the text and correct them with the help of the restrictions placed in the desired format. Remember to not make any changes on the labels of the desired format, simply extract the text, correct it and return only the desired format. Text: ARP No. 2017-04-0003-00036
|
225 |
-
OWNER: LISTANA, ROLANDO
|
226 |
-
Address: ALCAL� DARAGA, ALBAY
|
227 |
-
Tel No.:
|
228 |
-
Administrator/Beneficial User:
|
229 |
-
Address:
|
230 |
-
Tel No.:
|
231 |
-
BUILDING LOCATION
|
232 |
-
No. / Street
|
233 |
-
Brgy/District
|
234 |
-
Municipality:
|
235 |
-
ALCALA
|
236 |
-
Province/City
|
237 |
-
REAL PROPERTY FIELD APPRAISAL & ASSESSMENT SHEET - BUILDING & OTHER
|
238 |
-
STRUCTURES
|
239 |
-
DARALA
|
240 |
-
ALBAY
|
241 |
-
PROPERTY APPRAISAL
|
242 |
-
Kind of Bldg.
|
243 |
-
Structural Type V
|
244 |
-
Bldg. Permit No.
|
245 |
-
Date Issued
|
246 |
-
Condominium Certificate of Title(CCT)
|
247 |
-
Certificate of Completion Issued On:
|
248 |
-
Certificate of Occupancy Issued On:
|
249 |
-
Date Constructed/Completed: 1980
|
250 |
-
Date Occupied:
|
251 |
-
Tiles
|
252 |
-
STRUCTURAL MATERIALS (Checklist)
|
253 |
-
ROOF
|
254 |
-
Reinforced Concrete
|
255 |
-
G.I. Sheet
|
256 |
-
Aluminum
|
257 |
-
Asbestos
|
258 |
-
Long Span
|
259 |
-
Concrete Desk
|
260 |
-
Nipa/Anahaw/Gogon
|
261 |
-
Others (Specify)
|
262 |
-
FLOORING
|
263 |
-
Reinforced
|
264 |
-
Concrete
|
265 |
-
(for upper
|
266 |
-
floor)
|
267 |
-
Plain Cement
|
268 |
-
Marble
|
269 |
-
Wood
|
270 |
-
Tiles
|
271 |
-
Others
|
272 |
-
(specify)
|
273 |
-
PIN 0310400 301 024 -100
|
274 |
-
TIN
|
275 |
-
TIN
|
276 |
-
LAND REFERENCE
|
277 |
-
Owner
|
278 |
-
LISTANA
|
279 |
-
OCT/TCT/CLOA No.
|
280 |
-
Lot No.
|
281 |
-
Floor Plan:
|
282 |
-
Attach the building plan sketch of floor plan. A photograph may also be attached if necessary.
|
283 |
-
TD/ARP No:
|
284 |
-
Area
|
285 |
-
I
|
286 |
-
2798
|
287 |
-
Bldg. Age
|
288 |
-
No. of Storeys
|
289 |
-
Area of 1st flr:
|
290 |
-
Area of 2nd flr:
|
291 |
-
Area of 3rd flr:
|
292 |
-
Area of 4th flr:
|
293 |
-
TRANSACTION CODE
|
294 |
-
Total Floor Area: 125Q-m�
|
295 |
-
1st 2nd 3rd 4th
|
296 |
-
Flr. Flr. Flr. Flr.
|
297 |
-
MARIAND
|
298 |
-
*
|
299 |
-
Walls & 1st 2nd 3rd 4th
|
300 |
-
Partitions Flr. Flr. Flr. Flr.
|
301 |
-
Reinforced
|
302 |
-
Concrete
|
303 |
-
Plain
|
304 |
-
Cement
|
305 |
-
Wood
|
306 |
-
CHB
|
307 |
-
G.1 Sheet
|
308 |
-
Survey No. 4684
|
309 |
-
Blk No.
|
310 |
-
Build-a-
|
311 |
-
wall
|
312 |
-
Sawali
|
313 |
-
Bamboo
|
314 |
-
Others
|
315 |
-
(Specify)
|
316 |
-
|
317 |
-
Desired Format:
|
318 |
-
ARP No.: <Numerical Value Only, Replace Slashes with the number 1>
|
319 |
-
Owner: <Person's Name>
|
320 |
-
Address: -||-
|
321 |
-
Tel No.: -||-
|
322 |
-
Administrator/Beneficial User: -||-
|
323 |
-
Address: -||-
|
324 |
-
Tel No.: -||-
|
325 |
-
PIN: <Numerical Value Only, Replace Slashes with the number 1>
|
326 |
-
TIN_1: <Numerical Value Only, Replace Slashes with the number 1>
|
327 |
-
TIN_2: <Numerical Value Only, Replace Slashes with the number 1>
|
328 |
-
|
329 |
-
2023-05-11 19:13:48,089 - INFO - ARP No.: 2017-04-0003-00036
|
330 |
-
Owner: ROLANDO LISTANA
|
331 |
-
Address: ALCALA DARAGA, ALBAY
|
332 |
-
Tel No.: -||-
|
333 |
-
Administrator/Beneficial User: -||-
|
334 |
-
Address: -||-
|
335 |
-
Tel No.: -||-
|
336 |
-
PIN: 0310400 301 024-100
|
337 |
-
TIN_1: -||- (no value provided)
|
338 |
-
TIN_2: -||- (no value provided)
|
339 |
-
2023-05-11 19:13:48,089 - INFO - [{'File Name': 'DARAGA-ALCALA-0017', 'General Information': {'ARP No.': '2017-04-0000-00009', 'Owner': 'RUDY MADRONA', 'Address': '', 'Tel No.': '', 'Administrator/Beneficial User': '', 'Address:': '', 'Tel No.:': '', 'PIN': '0310 400 301008-1001', 'TIN_1': '', 'TIN_2': ''}, 'Building Location': {'No. / Street': '', 'Brgy/District': '', 'Municipality': '', 'Province/city': ''}, 'Land Reference': {'Owner': '', 'OCT/TCT/CLOA NO.': '', 'Lot No.': '', 'Survey No.': '', 'Blk No.': '', 'TD/ARP No.:': '', 'Area': ''}, 'Property Appraisal': {'Kind of Bldg': '', 'Structural Type': '', 'Bldg. Permit No.': '', 'Date Issued': '', 'Condominium Certificate of Title(CCT)': '', 'Certificate of Completion Issued on': '', 'Certificate of Occupancy Issued on': '', 'Date Constructed/Completed': '', 'Date Occupied': '', 'Bldg. Age': '', 'No. of Storeys': '', 'Area of 1st Flr': '', 'Area of 2nd Flr': '', 'Area of 3rd Flr': '', 'Area of 4th Flr': '', 'Total Floor Area': ''}}, {'File Name': 'DARAGA-ALCALA-0033', 'General Information': {'ARP No.': '2011-64-0003-00017', 'Owner': 'JULIAN LLANTOS', 'Address': '', 'Tel No.': '', 'Administrator/Beneficial User': '', 'Address:': '', 'Tel No.:': '', 'PIN': '0310400301012100', 'TIN_1': '', 'TIN_2': ''}, 'Building Location': {'No. / Street': '', 'Brgy/District': '', 'Municipality': '', 'Province/city': ''}, 'Land Reference': {'Owner': '', 'OCT/TCT/CLOA NO.': '', 'Lot No.': '', 'Survey No.': '', 'Blk No.': '', 'TD/ARP No.:': '', 'Area': ''}, 'Property Appraisal': {'Kind of Bldg': '', 'Structural Type': '', 'Bldg. Permit No.': '', 'Date Issued': '', 'Condominium Certificate of Title(CCT)': '', 'Certificate of Completion Issued on': '', 'Certificate of Occupancy Issued on': '', 'Date Constructed/Completed': '', 'Date Occupied': '', 'Bldg. Age': '', 'No. of Storeys': '', 'Area of 1st Flr': '', 'Area of 2nd Flr': '', 'Area of 3rd Flr': '', 'Area of 4th Flr': '', 'Total Floor Area': ''}}, {'File Name': 'DARAGA-ALCALA-0071', 'General Information': {'ARP No.': '2017-04-0003-00036', 'Owner': 'ROLANDO LISTANA', 'Address': '', 'Tel No.': '', 'Administrator/Beneficial User': '', 'Address:': '', 'Tel No.:': '', 'PIN': '0310400 301 024-100', 'TIN_1': '(no value provided)', 'TIN_2': '(no value provided)'}, 'Building Location': {'No. / Street': '', 'Brgy/District': '', 'Municipality': '', 'Province/city': ''}, 'Land Reference': {'Owner': '', 'OCT/TCT/CLOA NO.': '', 'Lot No.': '', 'Survey No.': '', 'Blk No.': '', 'TD/ARP No.:': '', 'Area': ''}, 'Property Appraisal': {'Kind of Bldg': '', 'Structural Type': '', 'Bldg. Permit No.': '', 'Date Issued': '', 'Condominium Certificate of Title(CCT)': '', 'Certificate of Completion Issued on': '', 'Certificate of Occupancy Issued on': '', 'Date Constructed/Completed': '', 'Date Occupied': '', 'Bldg. Age': '', 'No. of Storeys': '', 'Area of 1st Flr': '', 'Area of 2nd Flr': '', 'Area of 3rd Flr': '', 'Area of 4th Flr': '', 'Total Floor Area': ''}}]
|
340 |
-
2023-05-11 20:18:32,352 - INFO - The following text was scanned using OCR, your goal is to extract the important entities from the text and correct them with the help of the restrictions placed in the desired format. Remember to not make any changes on the labels of the desired format, simply extract the text, correct it and return only the desired format. Text: ARP No. 2017-04-0000-00009
|
341 |
-
OWNER: RUDY MADRONA
|
342 |
-
Address: ALCALA DARAGA, ALBAY
|
343 |
-
Tel No.:
|
344 |
-
Administrator/Beneficial User:
|
345 |
-
Address:
|
346 |
-
Tel No.:
|
347 |
-
BUILDING LOCATION
|
348 |
-
No. / Street
|
349 |
-
Brgy/District
|
350 |
-
ALCALA
|
351 |
-
DA RAGA
|
352 |
-
Municipality:
|
353 |
-
Province/City
|
354 |
-
ALBAY
|
355 |
-
PROPERTY APPRAISAL
|
356 |
-
Kind of Bldg.
|
357 |
-
Structural Type V
|
358 |
-
Bldg. Permit No.
|
359 |
-
Date Issued
|
360 |
-
Condominium Certificate of Title(CCT)
|
361 |
-
Certificate of Completion Issued On:
|
362 |
-
Certificate of Occupancy Issued On:
|
363 |
-
Date Constructed/Completed:
|
364 |
-
Date Occupied:
|
365 |
-
REAL PROPERTY FIELD APPRAISAL & ASSESSMENT SHEET - BUILDING & OTHER
|
366 |
-
STRUCTURES
|
367 |
-
PIN 0310 400 301008 -1001
|
368 |
-
TIN
|
369 |
-
Tiles
|
370 |
-
STRUCTURAL MATERIALS (Checklist)
|
371 |
-
ROOF
|
372 |
-
Reinforced Concrete
|
373 |
-
G.I. Sheet
|
374 |
-
Aluminum
|
375 |
-
Asbestos
|
376 |
-
Long Span
|
377 |
-
Concrete Desk
|
378 |
-
Nipa/Anahaw/Gogon
|
379 |
-
Others (Specify)
|
380 |
-
FLOORING
|
381 |
-
Reinforced
|
382 |
-
Concrete
|
383 |
-
(for upper
|
384 |
-
floor)
|
385 |
-
Plain Cement
|
386 |
-
TIN
|
387 |
-
Floor Plan:
|
388 |
-
Attach the building plan sketch of floor plan. A photograph may also be attached if necessary.
|
389 |
-
Marble
|
390 |
-
Wood
|
391 |
-
Tiles
|
392 |
-
Others
|
393 |
-
(specify)
|
394 |
-
LAND REFERENCE
|
395 |
-
Owner
|
396 |
-
OCT/TCT/CLOA No.
|
397 |
-
Lot No.
|
398 |
-
MADRONA, DEMETRIO
|
399 |
-
TD/ARP No:
|
400 |
-
Area
|
401 |
-
Bldg. Age
|
402 |
-
No. of Storeys
|
403 |
-
Area of 1st flr:
|
404 |
-
Area of 2nd flr:
|
405 |
-
Area of 3rd flr:
|
406 |
-
Area of 4th flr:
|
407 |
-
TRANSACTION CODE
|
408 |
-
Total Floor Area: 6 SQ.M.
|
409 |
-
1st 2nd 3rd 4th
|
410 |
-
Flr. Flr. Flr. Flr.
|
411 |
-
Plain
|
412 |
-
Cement
|
413 |
-
Wood
|
414 |
-
Walls & 1st 2nd 3rd 4th
|
415 |
-
Partitions Flr. Flr. Flr. Flr.
|
416 |
-
Reinforced
|
417 |
-
Concrete
|
418 |
-
CHB
|
419 |
-
G.I Sheet
|
420 |
-
Survey No. 300
|
421 |
-
Blk No.
|
422 |
-
Build-a-
|
423 |
-
wall
|
424 |
-
Sawali
|
425 |
-
Bamboo
|
426 |
-
Others
|
427 |
-
(Specify)
|
428 |
-
|
429 |
-
Desired Format:
|
430 |
-
ARP No.: <Numerical Value Only, Replace Slashes with the number 1>
|
431 |
-
Owner: <Person's Name>
|
432 |
-
Address: -||-
|
433 |
-
Tel No.: -||-
|
434 |
-
Administrator/Beneficial User: -||-
|
435 |
-
Address: -||-
|
436 |
-
Tel No.: -||-
|
437 |
-
PIN: <Numerical Value Only, Replace Slashes with the number 1>
|
438 |
-
TIN_1: <Numerical Value Only, Replace Slashes with the number 1>
|
439 |
-
TIN_2: <Numerical Value Only, Replace Slashes with the number 1>
|
440 |
-
|
441 |
-
2023-05-11 20:18:35,277 - INFO - ARP No.: 2017-04-0000-00009
|
442 |
-
Owner: Rudy Madrona
|
443 |
-
Address: Alcala Daraga, Albay
|
444 |
-
Tel No.:
|
445 |
-
Administrator/Beneficial User:
|
446 |
-
Address:
|
447 |
-
Tel No.:
|
448 |
-
PIN: 0310 400 301008-1001
|
449 |
-
TIN_1:
|
450 |
-
TIN_2:
|
451 |
-
2023-05-11 20:18:37,028 - INFO - The following text was scanned using OCR, your goal is to extract the important entities from the text and correct them with the help of the restrictions placed in the desired format. Remember to not make any changes on the labels of the desired format, simply extract the text, correct it and return only the desired format. Text: ARP Np. 2011-64-0003-00017
|
452 |
-
OWNER: LLANTOS, JULIAN
|
453 |
-
Address: ALCALA, DARAGA, AMBAY
|
454 |
-
Tel No.:
|
455 |
-
Administrator/Beneficial User:
|
456 |
-
Address:
|
457 |
-
Tel No.:
|
458 |
-
BUILDING LOCATION
|
459 |
-
No. / Street
|
460 |
-
Brgy/District
|
461 |
-
ALCALA
|
462 |
-
DARAGA
|
463 |
-
ALBAY
|
464 |
-
Municipality:
|
465 |
-
Province/City
|
466 |
-
PROPERTY APPRAISAL
|
467 |
-
Kind of Bldg.
|
468 |
-
Structural Type 111-C
|
469 |
-
Bldg. Permit No.
|
470 |
-
Date Issued
|
471 |
-
Condominium Certificate of Title(CCT)
|
472 |
-
Certificate of Completion Issued On:
|
473 |
-
Certificate of Occupancy Issued On:
|
474 |
-
REAL PROPERTY FIELD APPRAISAL & ASSESSMENT SHEET - BUILDING & OTHER
|
475 |
-
STRUCTURES
|
476 |
-
PIN 0310 400301012-100)
|
477 |
-
TIN
|
478 |
-
Date Constructed/Completed:
|
479 |
-
Date Occupied:
|
480 |
-
STRUCTURAL MATERIALS (Checklist)
|
481 |
-
Tiles
|
482 |
-
ROOF
|
483 |
-
Reinforced Concrete
|
484 |
-
G.I. Sheet
|
485 |
-
Aluminum
|
486 |
-
Asbestos
|
487 |
-
Long Span
|
488 |
-
Concrete Desk
|
489 |
-
Nipa/Anahaw/Gogon
|
490 |
-
Others (Specify)
|
491 |
-
Floor Plan:
|
492 |
-
Attach the building plan sketch of floor plan. A photograph may also be attached if necessary.
|
493 |
-
FLOORING
|
494 |
-
Reinforced
|
495 |
-
Concrete
|
496 |
-
(for upper
|
497 |
-
floor)
|
498 |
-
Plain Cement
|
499 |
-
Marble
|
500 |
-
Wood
|
501 |
-
Tiles
|
502 |
-
TIN
|
503 |
-
Others
|
504 |
-
(specify)
|
505 |
-
LAND REFERENCE
|
506 |
-
Owner
|
507 |
-
OCT/TCT/CLOA No.
|
508 |
-
Lot No. 3095-P
|
509 |
-
TD/ARP No:
|
510 |
-
Area
|
511 |
-
Bldg. Age
|
512 |
-
No. of Storeys
|
513 |
-
Area of 1st flr:
|
514 |
-
Area of 2nd flr:
|
515 |
-
Area of 3rd flr:
|
516 |
-
Area of 4th flr:
|
517 |
-
Total Floor Area: 49 SQ.M.
|
518 |
-
1st 2nd 3rd 4th
|
519 |
-
Flr. Flr. Flr. Flr.
|
520 |
-
TRANSACTION CODE
|
521 |
-
Walls &
|
522 |
-
Partitions
|
523 |
-
Reinforced
|
524 |
-
Concrete
|
525 |
-
Plain
|
526 |
-
Cement
|
527 |
-
Wood
|
528 |
-
CHB
|
529 |
-
G.I Sheet
|
530 |
-
Build-a-
|
531 |
-
wall
|
532 |
-
Sawali
|
533 |
-
Bamboo
|
534 |
-
Others
|
535 |
-
(Specify)
|
536 |
-
Survey No.
|
537 |
-
Bik No.
|
538 |
-
1st 2nd 3rd 4th
|
539 |
-
Flr. Fir. Flr. Flr.
|
540 |
-
|
541 |
-
Desired Format:
|
542 |
-
ARP No.: <Numerical Value Only, Replace Slashes with the number 1>
|
543 |
-
Owner: <Person's Name>
|
544 |
-
Address: -||-
|
545 |
-
Tel No.: -||-
|
546 |
-
Administrator/Beneficial User: -||-
|
547 |
-
Address: -||-
|
548 |
-
Tel No.: -||-
|
549 |
-
PIN: <Numerical Value Only, Replace Slashes with the number 1>
|
550 |
-
TIN_1: <Numerical Value Only, Replace Slashes with the number 1>
|
551 |
-
TIN_2: <Numerical Value Only, Replace Slashes with the number 1>
|
552 |
-
|
553 |
-
2023-05-11 20:18:39,870 - INFO - ARP No.: 2011-64-0003-00017
|
554 |
-
Owner: LLANTOS, JULIAN
|
555 |
-
Address: ALCALA, DARAGA, ALBAY
|
556 |
-
Tel No.:
|
557 |
-
Administrator/Beneficial User:
|
558 |
-
Address:
|
559 |
-
Tel No.:
|
560 |
-
PIN: 0310 400301012-100
|
561 |
-
TIN_1:
|
562 |
-
TIN_2:
|
563 |
-
2023-05-11 20:18:41,647 - INFO - The following text was scanned using OCR, your goal is to extract the important entities from the text and correct them with the help of the restrictions placed in the desired format. Remember to not make any changes on the labels of the desired format, simply extract the text, correct it and return only the desired format. Text: ARP No. 2017-04-0003-00036
|
564 |
-
OWNER: LISTANA, ROLANDO
|
565 |
-
Address: ALCAL� DARAGA, ALBAY
|
566 |
-
Tel No.:
|
567 |
-
Administrator/Beneficial User:
|
568 |
-
Address:
|
569 |
-
Tel No.:
|
570 |
-
BUILDING LOCATION
|
571 |
-
No. / Street
|
572 |
-
Brgy/District
|
573 |
-
Municipality:
|
574 |
-
ALCALA
|
575 |
-
Province/City
|
576 |
-
REAL PROPERTY FIELD APPRAISAL & ASSESSMENT SHEET - BUILDING & OTHER
|
577 |
-
STRUCTURES
|
578 |
-
DARALA
|
579 |
-
ALBAY
|
580 |
-
PROPERTY APPRAISAL
|
581 |
-
Kind of Bldg.
|
582 |
-
Structural Type V
|
583 |
-
Bldg. Permit No.
|
584 |
-
Date Issued
|
585 |
-
Condominium Certificate of Title(CCT)
|
586 |
-
Certificate of Completion Issued On:
|
587 |
-
Certificate of Occupancy Issued On:
|
588 |
-
Date Constructed/Completed: 1980
|
589 |
-
Date Occupied:
|
590 |
-
Tiles
|
591 |
-
STRUCTURAL MATERIALS (Checklist)
|
592 |
-
ROOF
|
593 |
-
Reinforced Concrete
|
594 |
-
G.I. Sheet
|
595 |
-
Aluminum
|
596 |
-
Asbestos
|
597 |
-
Long Span
|
598 |
-
Concrete Desk
|
599 |
-
Nipa/Anahaw/Gogon
|
600 |
-
Others (Specify)
|
601 |
-
FLOORING
|
602 |
-
Reinforced
|
603 |
-
Concrete
|
604 |
-
(for upper
|
605 |
-
floor)
|
606 |
-
Plain Cement
|
607 |
-
Marble
|
608 |
-
Wood
|
609 |
-
Tiles
|
610 |
-
Others
|
611 |
-
(specify)
|
612 |
-
PIN 0310400 301 024 -100
|
613 |
-
TIN
|
614 |
-
TIN
|
615 |
-
LAND REFERENCE
|
616 |
-
Owner
|
617 |
-
LISTANA
|
618 |
-
OCT/TCT/CLOA No.
|
619 |
-
Lot No.
|
620 |
-
Floor Plan:
|
621 |
-
Attach the building plan sketch of floor plan. A photograph may also be attached if necessary.
|
622 |
-
TD/ARP No:
|
623 |
-
Area
|
624 |
-
I
|
625 |
-
2798
|
626 |
-
Bldg. Age
|
627 |
-
No. of Storeys
|
628 |
-
Area of 1st flr:
|
629 |
-
Area of 2nd flr:
|
630 |
-
Area of 3rd flr:
|
631 |
-
Area of 4th flr:
|
632 |
-
TRANSACTION CODE
|
633 |
-
Total Floor Area: 125Q-m�
|
634 |
-
1st 2nd 3rd 4th
|
635 |
-
Flr. Flr. Flr. Flr.
|
636 |
-
MARIAND
|
637 |
-
*
|
638 |
-
Walls & 1st 2nd 3rd 4th
|
639 |
-
Partitions Flr. Flr. Flr. Flr.
|
640 |
-
Reinforced
|
641 |
-
Concrete
|
642 |
-
Plain
|
643 |
-
Cement
|
644 |
-
Wood
|
645 |
-
CHB
|
646 |
-
G.1 Sheet
|
647 |
-
Survey No. 4684
|
648 |
-
Blk No.
|
649 |
-
Build-a-
|
650 |
-
wall
|
651 |
-
Sawali
|
652 |
-
Bamboo
|
653 |
-
Others
|
654 |
-
(Specify)
|
655 |
-
|
656 |
-
Desired Format:
|
657 |
-
ARP No.: <Numerical Value Only, Replace Slashes with the number 1>
|
658 |
-
Owner: <Person's Name>
|
659 |
-
Address: -||-
|
660 |
-
Tel No.: -||-
|
661 |
-
Administrator/Beneficial User: -||-
|
662 |
-
Address: -||-
|
663 |
-
Tel No.: -||-
|
664 |
-
PIN: <Numerical Value Only, Replace Slashes with the number 1>
|
665 |
-
TIN_1: <Numerical Value Only, Replace Slashes with the number 1>
|
666 |
-
TIN_2: <Numerical Value Only, Replace Slashes with the number 1>
|
667 |
-
|
668 |
-
2023-05-11 20:18:45,356 - INFO - ARP No.: 2017-04-0003-000361
|
669 |
-
Owner: ROLANDO LISTANA
|
670 |
-
Address: ALCALA DARAGA, ALBAY
|
671 |
-
Tel No.: -||-
|
672 |
-
Administrator/Beneficial User: -||-
|
673 |
-
Address: -||-
|
674 |
-
Tel No.: -||-
|
675 |
-
PIN: 0310400301024100
|
676 |
-
TIN_1: -||-
|
677 |
-
TIN_2: -||-
|
678 |
-
2023-05-11 20:18:45,357 - INFO - [{'File Name': 'DARAGA-ALCALA-0017', 'General Information': {'ARP No.': '2017-04-0000-00009', 'Owner': 'Rudy Madrona', 'Address': '', 'Tel No.': '', 'Administrator/Beneficial User': '', 'Address:': '', 'Tel No.:': '', 'PIN': '0310 400 301008-1001', 'TIN_1': '', 'TIN_2': ''}, 'Building Location': {'No. / Street': '', 'Brgy/District': '', 'Municipality': '', 'Province/city': ''}, 'Land Reference': {'Owner': '', 'OCT/TCT/CLOA NO.': '', 'Lot No.': '', 'Survey No.': '', 'Blk No.': '', 'TD/ARP No.:': '', 'Area': ''}, 'Property Appraisal': {'Kind of Bldg': '', 'Structural Type': '', 'Bldg. Permit No.': '', 'Date Issued': '', 'Condominium Certificate of Title(CCT)': '', 'Certificate of Completion Issued on': '', 'Certificate of Occupancy Issued on': '', 'Date Constructed/Completed': '', 'Date Occupied': '', 'Bldg. Age': '', 'No. of Storeys': '', 'Area of 1st Flr': '', 'Area of 2nd Flr': '', 'Area of 3rd Flr': '', 'Area of 4th Flr': '', 'Total Floor Area': ''}}, {'File Name': 'DARAGA-ALCALA-0033', 'General Information': {'ARP No.': '2011-64-0003-00017', 'Owner': 'LLANTOS, JULIAN', 'Address': '', 'Tel No.': '', 'Administrator/Beneficial User': '', 'Address:': '', 'Tel No.:': '', 'PIN': '0310 400301012-100', 'TIN_1': '', 'TIN_2': ''}, 'Building Location': {'No. / Street': '', 'Brgy/District': '', 'Municipality': '', 'Province/city': ''}, 'Land Reference': {'Owner': '', 'OCT/TCT/CLOA NO.': '', 'Lot No.': '', 'Survey No.': '', 'Blk No.': '', 'TD/ARP No.:': '', 'Area': ''}, 'Property Appraisal': {'Kind of Bldg': '', 'Structural Type': '', 'Bldg. Permit No.': '', 'Date Issued': '', 'Condominium Certificate of Title(CCT)': '', 'Certificate of Completion Issued on': '', 'Certificate of Occupancy Issued on': '', 'Date Constructed/Completed': '', 'Date Occupied': '', 'Bldg. Age': '', 'No. of Storeys': '', 'Area of 1st Flr': '', 'Area of 2nd Flr': '', 'Area of 3rd Flr': '', 'Area of 4th Flr': '', 'Total Floor Area': ''}}, {'File Name': 'DARAGA-ALCALA-0071', 'General Information': {'ARP No.': '2017-04-0003-000361', 'Owner': 'ROLANDO LISTANA', 'Address': '', 'Tel No.': '', 'Administrator/Beneficial User': '', 'Address:': '', 'Tel No.:': '', 'PIN': '0310400301024100', 'TIN_1': '', 'TIN_2': ''}, 'Building Location': {'No. / Street': '', 'Brgy/District': '', 'Municipality': '', 'Province/city': ''}, 'Land Reference': {'Owner': '', 'OCT/TCT/CLOA NO.': '', 'Lot No.': '', 'Survey No.': '', 'Blk No.': '', 'TD/ARP No.:': '', 'Area': ''}, 'Property Appraisal': {'Kind of Bldg': '', 'Structural Type': '', 'Bldg. Permit No.': '', 'Date Issued': '', 'Condominium Certificate of Title(CCT)': '', 'Certificate of Completion Issued on': '', 'Certificate of Occupancy Issued on': '', 'Date Constructed/Completed': '', 'Date Occupied': '', 'Bldg. Age': '', 'No. of Storeys': '', 'Area of 1st Flr': '', 'Area of 2nd Flr': '', 'Area of 3rd Flr': '', 'Area of 4th Flr': '', 'Total Floor Area': ''}}]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
@@ -1,188 +1,38 @@
|
|
1 |
-
import os
|
2 |
import openai
|
3 |
import gradio as gr
|
4 |
-
import requests
|
5 |
-
import datetime
|
6 |
-
from io import BytesIO
|
7 |
-
from google.api_core.client_options import ClientOptions
|
8 |
-
from google.cloud import documentai_v1 as documentai
|
9 |
import json
|
10 |
-
from google.cloud import vision
|
11 |
import time
|
12 |
-
from settings import char_remove, gpt_model, RPFAAP2, RPFAAP1, project_id, project_location, processor_id
|
13 |
from tqdm import tqdm
|
14 |
import logging
|
|
|
15 |
import google
|
|
|
|
|
|
|
16 |
|
17 |
logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
18 |
|
19 |
-
def
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
sequence_1 = (prefix, context)
|
28 |
-
additional = (" ".join(sequence_1))
|
29 |
-
sequence = (additional, content)
|
30 |
-
|
31 |
-
final_content = (" ".join(sequence))
|
32 |
-
logging.info(final_content)
|
33 |
-
completion = openai.ChatCompletion.create(
|
34 |
-
model=gpt_model,
|
35 |
-
user="1",
|
36 |
-
temperature=0.1,
|
37 |
-
messages=[
|
38 |
-
{"role": "system", "content": prompt},
|
39 |
-
{"role": "user", "content": final_content}
|
40 |
-
]
|
41 |
-
)
|
42 |
-
logging.info(completion.choices[0].message.content)
|
43 |
-
return(completion.choices[0].message.content)
|
44 |
-
|
45 |
-
def remove_na(string):
|
46 |
-
for char in char_remove:
|
47 |
-
string = string.replace(char, "")
|
48 |
-
return string
|
49 |
-
|
50 |
-
def chat_gpt_document(content, document_type, context):
|
51 |
-
openai.api_key = os.environ['GPT_API_KEY']
|
52 |
-
prompt = "You are an expert at identifying OCR errors and correcting them with the help of context, intuition and logic."
|
53 |
-
document_prefix = "The following text was scanned using OCR, your goal is to extract the important entities from the text and correct them with the help of the restrictions placed in the desired format. Remember to not make any changes on the labels of the desired format, simply extract the text, correct it and return only the desired format. Text:"
|
54 |
-
additional_prefix = "Additionally the text"
|
55 |
-
|
56 |
-
content_info = content[0]
|
57 |
-
content_name = content[1]
|
58 |
-
if document_type == "RPFAA Building P1":
|
59 |
-
document = "RPFAAP1.json"
|
60 |
-
desired_format = RPFAAP1
|
61 |
-
elif document_type == "RPFAA Building P2":
|
62 |
-
document = "RPFAAP2.json"
|
63 |
-
desired_format = RPFAAP2
|
64 |
-
else:
|
65 |
-
property_info = ["Please Select a Document Type"]
|
66 |
-
return json.dumps(property_info, indent=4)
|
67 |
-
|
68 |
-
if context == "":
|
69 |
-
sequence_1 = (document_prefix, content_info, desired_format)
|
70 |
-
else:
|
71 |
-
sequence_1 = (document_prefix, content_info, desired_format, additional_prefix, context)
|
72 |
-
|
73 |
-
content_1 = (" ".join(sequence_1))
|
74 |
-
logging.info(content_1)
|
75 |
-
|
76 |
-
completion_1 = openai.ChatCompletion.create(
|
77 |
-
model=gpt_model,
|
78 |
-
user="1",
|
79 |
-
temperature=0.1,
|
80 |
-
messages=[
|
81 |
-
{"role": "system", "content": prompt},
|
82 |
-
{"role": "user", "content": content_1}
|
83 |
-
]
|
84 |
-
)
|
85 |
-
logging.info(completion_1.choices[0].message.content)
|
86 |
-
input_string = remove_na(completion_1.choices[0].message.content)
|
87 |
-
|
88 |
-
with open(document) as f:
|
89 |
-
property_info = json.load(f)
|
90 |
-
#Adds the name of the file
|
91 |
-
property_info["File Name"] = content_name
|
92 |
-
#Fills in the information
|
93 |
-
for line in input_string.split('\n'):
|
94 |
-
if ':' in line:
|
95 |
-
key, value = line.split(':', 1)
|
96 |
-
key = key.strip()
|
97 |
-
for category in property_info:
|
98 |
-
if key in property_info[category]:
|
99 |
-
property_info[category][key] = value.strip()
|
100 |
-
break
|
101 |
-
else:
|
102 |
-
if key == "File Name":
|
103 |
-
property_info[key] = value.strip()
|
104 |
-
return json.dumps(property_info, indent=4)
|
105 |
-
|
106 |
-
def detect_image(content, lang):
|
107 |
-
credentials = json.loads(os.environ['CREDENTIALS'])
|
108 |
-
temp_file_path = 'temp_credentials.json'
|
109 |
-
with open(temp_file_path, 'w') as file:
|
110 |
-
json.dump(credentials, file)
|
111 |
-
os.environ['GOOGLE_APPLICATION_CREDENTIALS']=r'temp_credentials.json'
|
112 |
-
client = vision.ImageAnnotatorClient()
|
113 |
-
buffer = BytesIO()
|
114 |
-
content.save(buffer, format="PNG")
|
115 |
-
content = buffer.getvalue()
|
116 |
-
if lang == "Filpino":
|
117 |
-
hints = "tl"
|
118 |
else:
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
response = client.document_text_detection(image=image, image_context={"language_hints": [hints]})
|
123 |
-
|
124 |
-
if response.error.message:
|
125 |
-
raise Exception(
|
126 |
-
'{}\nFor more info on error messages, check: '
|
127 |
-
'https://cloud.google.com/apis/design/errors'.format(
|
128 |
-
response.error.message))
|
129 |
-
|
130 |
-
os.remove(temp_file_path)
|
131 |
-
logging.info(response)
|
132 |
-
return(response.full_text_annotation.text)
|
133 |
-
|
134 |
-
def detect_document(content):
|
135 |
-
credentials = json.loads(os.environ['CREDENTIALS'])
|
136 |
-
temp_file_path = 'temp_credentials.json'
|
137 |
-
with open(temp_file_path, 'w') as file:
|
138 |
-
json.dump(credentials, file)
|
139 |
-
os.environ['GOOGLE_APPLICATION_CREDENTIALS']=r'temp_credentials.json'
|
140 |
-
PROJECT_ID = project_id
|
141 |
-
LOCATION = project_location # Format is 'us' or 'eu'
|
142 |
-
PROCESSOR_ID = processor_id # Create processor in Cloud Console
|
143 |
-
content_extension = content.name.split(".")[-1]
|
144 |
-
|
145 |
-
if content_extension.upper() == "TIFF":
|
146 |
-
MIME_TYPE = "image/tiff"
|
147 |
-
elif content_extension.upper() =="PDF":
|
148 |
-
MIME_TYPE = "application/pdf"
|
149 |
-
elif content_extension.upper() =="PNG":
|
150 |
-
MIME_TYPE = "image/png"
|
151 |
-
elif content_extension.upper() =="JPG":
|
152 |
-
MIME_TYPE = "image/jpg"
|
153 |
-
else:
|
154 |
-
return("Please upload a valid MIME type")
|
155 |
-
|
156 |
-
docai_client = documentai.DocumentProcessorServiceClient(
|
157 |
-
client_options=ClientOptions(api_endpoint=f"{LOCATION}-documentai.googleapis.com")
|
158 |
-
)
|
159 |
-
|
160 |
-
RESOURCE_NAME = docai_client.processor_path(PROJECT_ID, LOCATION, PROCESSOR_ID)
|
161 |
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
document_object = result.document
|
172 |
-
|
173 |
-
name = content.name.split('\\')[-1]
|
174 |
-
name = name.split("/")[-1]
|
175 |
-
name = name.split('.')[0]
|
176 |
-
|
177 |
-
os.remove(temp_file_path)
|
178 |
-
|
179 |
-
return(document_object.text, name)
|
180 |
-
|
181 |
-
def image(content, lang, context):
|
182 |
-
return chat_gpt_image(detect_image(content, lang), context)
|
183 |
-
|
184 |
-
def document(content, document_type, context):
|
185 |
-
return chat_gpt_document(detect_document(content),document_type,context)
|
186 |
|
187 |
unprocessed_documents = []
|
188 |
global_document_type = None
|
@@ -218,8 +68,8 @@ def batch_document(content, document_type, context, progress = gr.Progress()):
|
|
218 |
else:
|
219 |
progress(0, desc="Starting")
|
220 |
for x in progress.tqdm(content, desc="Processing"):
|
221 |
-
retries =
|
222 |
-
timeout =
|
223 |
i = 0
|
224 |
while True:
|
225 |
try:
|
@@ -241,39 +91,10 @@ def batch_document(content, document_type, context, progress = gr.Progress()):
|
|
241 |
if document_type == "":
|
242 |
document_type = "error"
|
243 |
return save_json(combined_data, document_type)
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
global global_document_type
|
249 |
-
global global_context
|
250 |
-
global unprocessed_documents
|
251 |
-
if unprocessed_documents:
|
252 |
-
output = batch_document(unprocessed_documents, global_document_type, global_context, "None")
|
253 |
-
unprocessed_documents = []
|
254 |
-
return output
|
255 |
-
else:
|
256 |
-
unprocessed_documents = []
|
257 |
-
return save_json("No Unprocessed Documents", "No Unprocessed Documents")
|
258 |
-
|
259 |
-
def save_json(text, filename):
|
260 |
-
filename = filename+".json"
|
261 |
-
with open(filename, "w", encoding='utf-8') as outfile:
|
262 |
-
json.dump(text, outfile, ensure_ascii=False)
|
263 |
-
return filename
|
264 |
-
|
265 |
-
def combine_json_files(json_files, progress=gr.Progress()):
|
266 |
-
combined_data = []
|
267 |
-
progress(0, desc="Starting")
|
268 |
-
for file in progress.tqdm(json_files, desc="Combining JSON Files"):
|
269 |
-
with open(file.name, 'r') as json_file:
|
270 |
-
data = json.load(json_file)
|
271 |
-
combined_data.extend(data)
|
272 |
-
# Convert the combined_data dict back to a JSON string
|
273 |
-
# You might want to save this to a file and return the file,
|
274 |
-
# or return the JSON string directly
|
275 |
-
logging.info("Combined JSON File: ", combined_data)
|
276 |
-
return save_json(combined_data, "Combined Json")
|
277 |
|
278 |
with gr.Blocks(title="Axon OCR", css=".markdown {text-align: center;}") as app:
|
279 |
gr.Markdown("""# Axon OCR
|
@@ -282,7 +103,6 @@ with gr.Blocks(title="Axon OCR", css=".markdown {text-align: center;}") as app:
|
|
282 |
with gr.Row():
|
283 |
with gr.Column():
|
284 |
image_input = [gr.Image(type="pil"),
|
285 |
-
gr.Radio(["English", "Filipino"], label="Language", info="What is the document language? (Optional)"),
|
286 |
gr.Textbox(label="What kind of Image is this? (Optional)", placeholder="This is an image of an Official Reciept")]
|
287 |
image_output = gr.Textbox(label="Result")
|
288 |
image_button = gr.Button("Scan")
|
@@ -290,7 +110,7 @@ with gr.Blocks(title="Axon OCR", css=".markdown {text-align: center;}") as app:
|
|
290 |
with gr.Row():
|
291 |
with gr.Column():
|
292 |
document_input = [gr.File(file_types=["pdf","tiff","image","text"]),
|
293 |
-
gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2"], label="File Type", info="What type of document is this?"),
|
294 |
gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
|
295 |
document_output = gr.Textbox(label="Result")
|
296 |
document_button = gr.Button("Scan")
|
@@ -298,7 +118,7 @@ with gr.Blocks(title="Axon OCR", css=".markdown {text-align: center;}") as app:
|
|
298 |
with gr.Row():
|
299 |
with gr.Column():
|
300 |
batch_document_input = [gr.File(file_types=["pdf","tiff","image","text"], file_count="multiple"),
|
301 |
-
gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2"], label="File Type", info="What type of document is this?"),
|
302 |
gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
|
303 |
batch_document_output = gr.File(label="Result")
|
304 |
batch_document_button = gr.Button("Scan")
|
@@ -306,7 +126,7 @@ with gr.Blocks(title="Axon OCR", css=".markdown {text-align: center;}") as app:
|
|
306 |
with gr.Column():
|
307 |
retry_button = gr.Button("Retry Unprocessed Documents", label="Retry")
|
308 |
with gr.Column():
|
309 |
-
stop_button = gr.Button("Stop Processing
|
310 |
with gr.Tab("Combine JSON"):
|
311 |
with gr.Row():
|
312 |
with gr.Column():
|
@@ -322,4 +142,4 @@ with gr.Blocks(title="Axon OCR", css=".markdown {text-align: center;}") as app:
|
|
322 |
combine_button.click(combine_json_files, inputs=json_files_input, outputs=combined_json_output)
|
323 |
|
324 |
app.queue()
|
325 |
-
app.launch(auth=("username", "password"))
|
|
|
|
|
1 |
import openai
|
2 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
3 |
import json
|
|
|
4 |
import time
|
|
|
5 |
from tqdm import tqdm
|
6 |
import logging
|
7 |
+
import requests
|
8 |
import google
|
9 |
+
from ocr_functions import detect_document, detect_image
|
10 |
+
from ai_functions import chat_gpt_document, chat_gpt_image
|
11 |
+
from helpers import save_json
|
12 |
|
13 |
logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
14 |
|
15 |
+
def retry_unprocessed_documents():
|
16 |
+
global global_document_type
|
17 |
+
global global_context
|
18 |
+
global unprocessed_documents
|
19 |
+
if unprocessed_documents:
|
20 |
+
output = batch_document(unprocessed_documents, global_document_type, global_context, "None")
|
21 |
+
unprocessed_documents = []
|
22 |
+
return output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
else:
|
24 |
+
unprocessed_documents = []
|
25 |
+
return save_json("No Unprocessed Documents", "No Unprocessed Documents")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
+
def combine_json_files(json_files, progress=gr.Progress()):
|
28 |
+
combined_data = []
|
29 |
+
progress(0, desc="Starting")
|
30 |
+
for file in progress.tqdm(json_files, desc="Combining JSON Files"):
|
31 |
+
with open(file.name, 'r') as json_file:
|
32 |
+
data = json.load(json_file)
|
33 |
+
combined_data.extend(data)
|
34 |
+
logging.info("Combined JSON File: ", combined_data)
|
35 |
+
return save_json(combined_data, "Combined Json")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
unprocessed_documents = []
|
38 |
global_document_type = None
|
|
|
68 |
else:
|
69 |
progress(0, desc="Starting")
|
70 |
for x in progress.tqdm(content, desc="Processing"):
|
71 |
+
retries = 1
|
72 |
+
timeout = 1
|
73 |
i = 0
|
74 |
while True:
|
75 |
try:
|
|
|
91 |
if document_type == "":
|
92 |
document_type = "error"
|
93 |
return save_json(combined_data, document_type)
|
94 |
+
def image(content, context):
|
95 |
+
return chat_gpt_image(detect_image(content), context)
|
96 |
+
def document(content, document_type, context):
|
97 |
+
return chat_gpt_document(detect_document(content),document_type,context)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
with gr.Blocks(title="Axon OCR", css=".markdown {text-align: center;}") as app:
|
100 |
gr.Markdown("""# Axon OCR
|
|
|
103 |
with gr.Row():
|
104 |
with gr.Column():
|
105 |
image_input = [gr.Image(type="pil"),
|
|
|
106 |
gr.Textbox(label="What kind of Image is this? (Optional)", placeholder="This is an image of an Official Reciept")]
|
107 |
image_output = gr.Textbox(label="Result")
|
108 |
image_button = gr.Button("Scan")
|
|
|
110 |
with gr.Row():
|
111 |
with gr.Column():
|
112 |
document_input = [gr.File(file_types=["pdf","tiff","image","text"]),
|
113 |
+
gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2", "TDRP"], label="File Type", info="What type of document is this?"),
|
114 |
gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
|
115 |
document_output = gr.Textbox(label="Result")
|
116 |
document_button = gr.Button("Scan")
|
|
|
118 |
with gr.Row():
|
119 |
with gr.Column():
|
120 |
batch_document_input = [gr.File(file_types=["pdf","tiff","image","text"], file_count="multiple"),
|
121 |
+
gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2", "TDRP"], label="File Type", info="What type of document is this?"),
|
122 |
gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
|
123 |
batch_document_output = gr.File(label="Result")
|
124 |
batch_document_button = gr.Button("Scan")
|
|
|
126 |
with gr.Column():
|
127 |
retry_button = gr.Button("Retry Unprocessed Documents", label="Retry")
|
128 |
with gr.Column():
|
129 |
+
stop_button = gr.Button("Stop Processing Document", label="Stop")
|
130 |
with gr.Tab("Combine JSON"):
|
131 |
with gr.Row():
|
132 |
with gr.Column():
|
|
|
142 |
combine_button.click(combine_json_files, inputs=json_files_input, outputs=combined_json_output)
|
143 |
|
144 |
app.queue()
|
145 |
+
app.launch(share=True, auth=("username", "password"))
|
gr.py
DELETED
@@ -1,11 +0,0 @@
|
|
1 |
-
import json
|
2 |
-
import gradio as gr
|
3 |
-
|
4 |
-
def save_json(text):
|
5 |
-
with open("output.json", "w") as outfile:
|
6 |
-
json.dump(text, outfile)
|
7 |
-
|
8 |
-
return "output.json"
|
9 |
-
|
10 |
-
demo = gr.Interface(save_text_as_json, "text", "file")
|
11 |
-
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
helpers.py
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from settings import char_remove
|
2 |
+
import re
|
3 |
+
import json
|
4 |
+
import logging
|
5 |
+
logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
6 |
+
|
7 |
+
def remove_na(string):
|
8 |
+
for char in char_remove:
|
9 |
+
string = string.replace(char, "")
|
10 |
+
return string
|
11 |
+
|
12 |
+
def save_json(text, filename):
|
13 |
+
filename = filename+".json"
|
14 |
+
with open(filename, "w", encoding='utf-8') as outfile:
|
15 |
+
json.dump(text, outfile, ensure_ascii=False)
|
16 |
+
return filename
|
17 |
+
|
18 |
+
def format_polygon(polygon):
|
19 |
+
if not polygon:
|
20 |
+
return "N/A"
|
21 |
+
return ", ".join(["[{}, {}]".format(p.x, p.y) for p in polygon])
|
22 |
+
|
23 |
+
def filter_tables(input_string, table_numbers):
|
24 |
+
# Splitting the input_string into tables
|
25 |
+
tables = re.split(r"Table # \d+", input_string)[1:] # we start from 1 to exclude the initial empty string
|
26 |
+
|
27 |
+
json_tables = {}
|
28 |
+
table_counter = 1
|
29 |
+
|
30 |
+
for table_number in table_numbers:
|
31 |
+
# Picking the specific table
|
32 |
+
table_str = tables[table_number]
|
33 |
+
|
34 |
+
# Extracting cell coordinates and contents
|
35 |
+
cells = re.findall(r"Cell\[(\d+)\]\[(\d+)\] has content '(.*?)'", table_str)
|
36 |
+
|
37 |
+
# Find the number of rows and columns
|
38 |
+
num_rows = max([int(cell[0]) for cell in cells]) + 1
|
39 |
+
num_cols = max([int(cell[1]) for cell in cells]) + 1
|
40 |
+
|
41 |
+
# Initialize table with empty strings
|
42 |
+
table = [["" for _ in range(num_cols)] for _ in range(num_rows)]
|
43 |
+
|
44 |
+
# Fill table based on cell coordinates
|
45 |
+
for cell in cells:
|
46 |
+
row, col, content = int(cell[0]), int(cell[1]), cell[2]
|
47 |
+
table[row][col] = content
|
48 |
+
|
49 |
+
# Adding table to the dictionary
|
50 |
+
json_tables[f"table_{table_counter}"] = table
|
51 |
+
|
52 |
+
# Increment the table counter
|
53 |
+
table_counter += 1
|
54 |
+
|
55 |
+
# Converting the dictionary to a JSON string
|
56 |
+
json_string = json.dumps(json_tables)
|
57 |
+
|
58 |
+
return json_string
|
59 |
+
|
60 |
+
def extract_text_within_range(input_string, x_range, y_range):
|
61 |
+
pattern = r"Line # \d+ text '([^']*)' within bounding polygon '(\[[\d.]+, [\d.]+\], \[[\d.]+, [\d.]+\], \[[\d.]+, [\d.]+\], \[[\d.]+, [\d.]+\])'"
|
62 |
+
matches = re.findall(pattern, input_string)
|
63 |
+
|
64 |
+
output = []
|
65 |
+
|
66 |
+
for text, polygon_str in matches:
|
67 |
+
polygon = eval(polygon_str) # Convert string to list of coordinates
|
68 |
+
for (x, y) in polygon:
|
69 |
+
if x_range[0] <= x <= x_range[1] and y_range[0] <= y <= y_range[1]:
|
70 |
+
output.append(text)
|
71 |
+
break # If any coordinate is within range, add the text to the output
|
72 |
+
|
73 |
+
return output
|
74 |
+
|
75 |
+
def merge_strings(input_string, input_coords, extract_coords):
|
76 |
+
lines1 = input_string.split('\n')
|
77 |
+
lines2 = input_coords.split('\n')
|
78 |
+
# Filter out empty lines and strip leading/trailing whitespaces
|
79 |
+
lines2 = [line.strip() for line in lines2 if line.strip()]
|
80 |
+
|
81 |
+
logging.info(lines2)
|
82 |
+
# Creating dictionaries to store the key-value pairs
|
83 |
+
dict1 = {line.split(": ")[0]: line.split(": ")[1] for line in lines1}
|
84 |
+
dict2 = {line.split(": ")[0]: line.split(": ")[1] for line in lines2}
|
85 |
+
|
86 |
+
# Updating the values in dict1 with the ones from dict2 if they share the same key
|
87 |
+
for key in dict1.keys():
|
88 |
+
if key in dict2:
|
89 |
+
dict1[key] = dict2[key]
|
90 |
+
|
91 |
+
for key, coord_str in dict1.items():
|
92 |
+
if coord_str.startswith('('): # check if the string represents a tuple
|
93 |
+
# Parse coordinates
|
94 |
+
coords = eval(coord_str)
|
95 |
+
# Convert coordinates into x and y ranges
|
96 |
+
x_range = (coords[0][0], coords[1][0])
|
97 |
+
y_range = (coords[0][1], coords[1][1])
|
98 |
+
# Use the function to extract the text
|
99 |
+
text = extract_text_within_range(extract_coords, x_range, y_range)
|
100 |
+
# Update the dictionary with the extracted text or '-||-' if empty
|
101 |
+
dict1[key] = ', '.join(text) if text else '-||-'
|
102 |
+
|
103 |
+
# Constructing the updated string1
|
104 |
+
input_string = '\n'.join([f"{key}: {value}" for key, value in dict1.items()])
|
105 |
+
|
106 |
+
return input_string
|
ocr_functions.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from azure.core.credentials import AzureKeyCredential
|
2 |
+
from azure.ai.formrecognizer import DocumentAnalysisClient
|
3 |
+
from io import BytesIO
|
4 |
+
from helpers import format_polygon
|
5 |
+
import logging
|
6 |
+
import os
|
7 |
+
|
8 |
+
logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
9 |
+
endpoint = os.environ['AZURE_API_ENDPOINT']
|
10 |
+
key = os.environ['AZURE_API_KEY']
|
11 |
+
|
12 |
+
def detect_document(content):
|
13 |
+
document_analysis_client = DocumentAnalysisClient(
|
14 |
+
endpoint=endpoint, credential=AzureKeyCredential(key)
|
15 |
+
)
|
16 |
+
|
17 |
+
with open(content.name, "rb") as f:
|
18 |
+
poller = document_analysis_client.begin_analyze_document(
|
19 |
+
"prebuilt-document", document=f
|
20 |
+
)
|
21 |
+
result = poller.result()
|
22 |
+
|
23 |
+
pair_content = "----Key-value pairs found in document----\n"
|
24 |
+
for kv_pair in result.key_value_pairs:
|
25 |
+
if kv_pair.key and kv_pair.value:
|
26 |
+
pair_content += "Key '{}' with Value '{}' \n".format(
|
27 |
+
kv_pair.key.content,
|
28 |
+
kv_pair.value.content
|
29 |
+
)
|
30 |
+
logging.info(pair_content)
|
31 |
+
document_content = "----Lines found in document----\n"
|
32 |
+
for page in result.pages:
|
33 |
+
for line_idx, line in enumerate(page.lines):
|
34 |
+
document_content += "...Line # {} text '{}' within bounding polygon '{}' \n".format(
|
35 |
+
line_idx,
|
36 |
+
line.content,
|
37 |
+
format_polygon(line.polygon),
|
38 |
+
)
|
39 |
+
logging.info(document_content)
|
40 |
+
table_content = "----Tables found in document----\n"
|
41 |
+
for table_idx, table in enumerate(result.tables):
|
42 |
+
table_content += "Table # {} has {} rows and {} columns\n".format(
|
43 |
+
table_idx, table.row_count, table.column_count
|
44 |
+
)
|
45 |
+
for cell in table.cells:
|
46 |
+
table_content += "...Cell[{}][{}] has content '{}'\n".format(
|
47 |
+
cell.row_index,
|
48 |
+
cell.column_index,
|
49 |
+
cell.content,
|
50 |
+
)
|
51 |
+
logging.info(table_content)
|
52 |
+
name = content.name.split('\\')[-1]
|
53 |
+
name = name.split('.')[0]
|
54 |
+
return (pair_content, document_content, table_content, name)
|
55 |
+
|
56 |
+
def detect_image(content):
|
57 |
+
document_analysis_client = DocumentAnalysisClient(
|
58 |
+
endpoint=endpoint, credential=AzureKeyCredential(key)
|
59 |
+
)
|
60 |
+
byte_stream = BytesIO()
|
61 |
+
content.save(byte_stream, format='PNG') # or 'JPEG', 'BMP', etc. depending on your image
|
62 |
+
byte_stream.seek(0) # reset pointer back to the start of the stream
|
63 |
+
poller = document_analysis_client.begin_analyze_document(
|
64 |
+
"prebuilt-read", document=byte_stream
|
65 |
+
)
|
66 |
+
|
67 |
+
result = poller.result()
|
68 |
+
logging.info(result.content)
|
69 |
+
return(result.content)
|
output.json
DELETED
@@ -1,146 +0,0 @@
|
|
1 |
-
[
|
2 |
-
{
|
3 |
-
"File Name": "DARAGA-ALCALA-0013",
|
4 |
-
"General Information": {
|
5 |
-
"ARP No.": "1017-64-0003-00007",
|
6 |
-
"Owner": "SAME",
|
7 |
-
"Address": "",
|
8 |
-
"Tel No.": "",
|
9 |
-
"Administrator/Beneficial User": "",
|
10 |
-
"Address:": "",
|
11 |
-
"Tel No.:": "",
|
12 |
-
"PIN": "03/0400301007-1001",
|
13 |
-
"TIN_1": "",
|
14 |
-
"TIN_2": ""
|
15 |
-
},
|
16 |
-
"Building Location": {
|
17 |
-
"No. / Street": "",
|
18 |
-
"Brgy/District": "ALCALA",
|
19 |
-
"Municipality": "DARAGA",
|
20 |
-
"Province/city": "ALBAY"
|
21 |
-
},
|
22 |
-
"Land Reference": {
|
23 |
-
"Owner": "",
|
24 |
-
"OCT/TCT/CLOA NO.": "",
|
25 |
-
"Lot No.": "",
|
26 |
-
"Survey No.": "3096-P",
|
27 |
-
"Blk No.": "",
|
28 |
-
"TD/ARP No.:": "",
|
29 |
-
"Area": "+6647 sq.m."
|
30 |
-
},
|
31 |
-
"Property Appraisal": {
|
32 |
-
"Kind of Bldg": "",
|
33 |
-
"Structural Type": "M-C",
|
34 |
-
"Bldg. Permit No.": "",
|
35 |
-
"Date Issued": "",
|
36 |
-
"Condominium Certificate of Title(CCT)": "",
|
37 |
-
"Certificate of Completion Issued on": "",
|
38 |
-
"Certificate of Occupancy Issued on": "",
|
39 |
-
"Date Constructed/Completed": "",
|
40 |
-
"Date Occupied": "",
|
41 |
-
"Bldg. Age": "",
|
42 |
-
"No. of Storeys": "",
|
43 |
-
"Area of 1st Flr": "-||-",
|
44 |
-
"Area of 2nd Flr": "-||-",
|
45 |
-
"Area of 3rd Flr": "-||-",
|
46 |
-
"Area of 4th Flr": "-||-",
|
47 |
-
"Total Floor Area": "12 sq.m."
|
48 |
-
}
|
49 |
-
},
|
50 |
-
{
|
51 |
-
"File Name": "DARAGA-ALCALA-0017",
|
52 |
-
"General Information": {
|
53 |
-
"ARP No.": "2017-04-0000-00009",
|
54 |
-
"Owner": "Demetrio Madrona",
|
55 |
-
"Address": "",
|
56 |
-
"Tel No.": "",
|
57 |
-
"Administrator/Beneficial User": "",
|
58 |
-
"Address:": "",
|
59 |
-
"Tel No.:": "",
|
60 |
-
"PIN": "0310 400 301008-1001",
|
61 |
-
"TIN_1": "",
|
62 |
-
"TIN_2": ""
|
63 |
-
},
|
64 |
-
"Building Location": {
|
65 |
-
"No. / Street": "",
|
66 |
-
"Brgy/District": "Alcala Daraga",
|
67 |
-
"Municipality": "Albay",
|
68 |
-
"Province/city": "Albay"
|
69 |
-
},
|
70 |
-
"Land Reference": {
|
71 |
-
"Owner": "",
|
72 |
-
"OCT/TCT/CLOA NO.": "",
|
73 |
-
"Lot No.": "",
|
74 |
-
"Survey No.": "300",
|
75 |
-
"Blk No.": "",
|
76 |
-
"TD/ARP No.:": "",
|
77 |
-
"Area": "6 SQ.M."
|
78 |
-
},
|
79 |
-
"Property Appraisal": {
|
80 |
-
"Kind of Bldg": "",
|
81 |
-
"Structural Type": "V",
|
82 |
-
"Bldg. Permit No.": "",
|
83 |
-
"Date Issued": "",
|
84 |
-
"Condominium Certificate of Title(CCT)": "",
|
85 |
-
"Certificate of Completion Issued on": "",
|
86 |
-
"Certificate of Occupancy Issued on": "",
|
87 |
-
"Date Constructed/Completed": "",
|
88 |
-
"Date Occupied": "",
|
89 |
-
"Bldg. Age": "",
|
90 |
-
"No. of Storeys": "",
|
91 |
-
"Area of 1st Flr": "",
|
92 |
-
"Area of 2nd Flr": "",
|
93 |
-
"Area of 3rd Flr": "",
|
94 |
-
"Area of 4th Flr": "",
|
95 |
-
"Total Floor Area": "6 SQ.M."
|
96 |
-
}
|
97 |
-
},
|
98 |
-
{
|
99 |
-
"File Name": "DARAGA-ALCALA-0019",
|
100 |
-
"General Information": {
|
101 |
-
"ARP No.": "01 04-00 000 0",
|
102 |
-
"Owner": "DEMETRIO MADRONA",
|
103 |
-
"Address": "",
|
104 |
-
"Tel No.": "",
|
105 |
-
"Administrator/Beneficial User": "",
|
106 |
-
"Address:": "",
|
107 |
-
"Tel No.:": "",
|
108 |
-
"PIN": "03 400301008 -1002",
|
109 |
-
"TIN_1": "",
|
110 |
-
"TIN_2": ""
|
111 |
-
},
|
112 |
-
"Building Location": {
|
113 |
-
"No. / Street": "",
|
114 |
-
"Brgy/District": "ALCALA",
|
115 |
-
"Municipality": "DARAGA",
|
116 |
-
"Province/city": "ALBAY"
|
117 |
-
},
|
118 |
-
"Land Reference": {
|
119 |
-
"Owner": "",
|
120 |
-
"OCT/TCT/CLOA NO.": "",
|
121 |
-
"Lot No.": "",
|
122 |
-
"Survey No.": "",
|
123 |
-
"Blk No.": "",
|
124 |
-
"TD/ARP No.:": "",
|
125 |
-
"Area": "6 SQ.M."
|
126 |
-
},
|
127 |
-
"Property Appraisal": {
|
128 |
-
"Kind of Bldg": "",
|
129 |
-
"Structural Type": "IV",
|
130 |
-
"Bldg. Permit No.": "",
|
131 |
-
"Date Issued": "",
|
132 |
-
"Condominium Certificate of Title(CCT)": "",
|
133 |
-
"Certificate of Completion Issued on": "",
|
134 |
-
"Certificate of Occupancy Issued on": "",
|
135 |
-
"Date Constructed/Completed": "",
|
136 |
-
"Date Occupied": "",
|
137 |
-
"Bldg. Age": "",
|
138 |
-
"No. of Storeys": "",
|
139 |
-
"Area of 1st Flr": "",
|
140 |
-
"Area of 2nd Flr": "",
|
141 |
-
"Area of 3rd Flr": "",
|
142 |
-
"Area of 4th Flr": "",
|
143 |
-
"Total Floor Area": "6 SQ.M."
|
144 |
-
}
|
145 |
-
}
|
146 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
|
|
settings.py
CHANGED
@@ -1,13 +1,8 @@
|
|
1 |
#OpenAI Variables
|
2 |
gpt_model = "gpt-3.5-turbo"
|
3 |
|
4 |
-
#Google Variables
|
5 |
-
project_id = "advance-river-381411"
|
6 |
-
project_location = "us"
|
7 |
-
processor_id = "31bc9a6106cb3cac"
|
8 |
-
|
9 |
#Company Specfic Variables
|
10 |
-
char_remove = ["N/A", "(Use additional sheet if necessary)", "(not provided)", "Not specified", "-||-", "0 sq.m."]
|
11 |
|
12 |
#Company Documents
|
13 |
RPFAAP2 = '''
|
@@ -29,9 +24,9 @@ RPFAAP2 = '''
|
|
29 |
Memoranda: -||-
|
30 |
Date of Entry in the Record of Assessment: -||-
|
31 |
Name: -||-
|
32 |
-
PIN: <Numerical Value Only
|
33 |
-
ARP No.: <Numerical Value Only
|
34 |
-
TD No.: <Numerical Value Only
|
35 |
Total Assessed Value: -||-
|
36 |
Previous Owner: <Person's Name>
|
37 |
Effectivity of Assessment: -||-
|
@@ -41,16 +36,16 @@ RPFAAP2 = '''
|
|
41 |
|
42 |
RPFAAP1 = '''
|
43 |
Desired Format:
|
44 |
-
ARP No.: <Numerical Value Only
|
45 |
-
|
46 |
Address: -||-
|
47 |
Tel No.: -||-
|
48 |
Administrator/Beneficial User: -||-
|
49 |
Address: -||-
|
50 |
Tel No.: -||-
|
51 |
-
PIN: <Numerical Value Only
|
52 |
-
TIN_1: <Numerical Value Only
|
53 |
-
TIN_2: <Numerical Value Only
|
54 |
No. / Street: -||-
|
55 |
Brgy/District: -||-
|
56 |
Municipality: -||-
|
@@ -60,10 +55,10 @@ RPFAAP1 = '''
|
|
60 |
Lot No.: -||-
|
61 |
Survey No.: -||-
|
62 |
Blk No.: -||-
|
63 |
-
TD/ARP No.: <Numerical Value Only
|
64 |
Area: <Area in SQ.M.>
|
65 |
Kind of Bldg.: -||-
|
66 |
-
Structural Type:
|
67 |
Bldg. Permit No.: -||-
|
68 |
Date Issued: -||-
|
69 |
Condominium Certificate of Title(CCT): -||-
|
@@ -78,4 +73,51 @@ RPFAAP1 = '''
|
|
78 |
Area of 3rd Flr: <Numerical Value in SQ.M.>
|
79 |
Area of 4th Flr: <Numerical Value in SQ.M.>
|
80 |
Total Floor Area: <Numerical Value in SQ.M.>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
'''
|
|
|
1 |
#OpenAI Variables
|
2 |
gpt_model = "gpt-3.5-turbo"
|
3 |
|
|
|
|
|
|
|
|
|
|
|
4 |
#Company Specfic Variables
|
5 |
+
char_remove = ["N/A", "(Use additional sheet if necessary)", "(not provided)", "Not specified", "-||-", "0 sq.m.", "(not provided in text)"]
|
6 |
|
7 |
#Company Documents
|
8 |
RPFAAP2 = '''
|
|
|
24 |
Memoranda: -||-
|
25 |
Date of Entry in the Record of Assessment: -||-
|
26 |
Name: -||-
|
27 |
+
PIN: <Numerical Value Only>
|
28 |
+
ARP No.: <Numerical Value Only>
|
29 |
+
TD No.: <Numerical Value Only>
|
30 |
Total Assessed Value: -||-
|
31 |
Previous Owner: <Person's Name>
|
32 |
Effectivity of Assessment: -||-
|
|
|
36 |
|
37 |
RPFAAP1 = '''
|
38 |
Desired Format:
|
39 |
+
ARP No.: <Numerical Value Only>
|
40 |
+
OWNER: <Person's Name>
|
41 |
Address: -||-
|
42 |
Tel No.: -||-
|
43 |
Administrator/Beneficial User: -||-
|
44 |
Address: -||-
|
45 |
Tel No.: -||-
|
46 |
+
PIN: <Numerical Value Only>
|
47 |
+
TIN_1: <Numerical Value Only>
|
48 |
+
TIN_2: <Numerical Value Only>
|
49 |
No. / Street: -||-
|
50 |
Brgy/District: -||-
|
51 |
Municipality: -||-
|
|
|
55 |
Lot No.: -||-
|
56 |
Survey No.: -||-
|
57 |
Blk No.: -||-
|
58 |
+
TD/ARP No.: <Numerical Value Only>
|
59 |
Area: <Area in SQ.M.>
|
60 |
Kind of Bldg.: -||-
|
61 |
+
Structural Type: <Roman Numerals Only>
|
62 |
Bldg. Permit No.: -||-
|
63 |
Date Issued: -||-
|
64 |
Condominium Certificate of Title(CCT): -||-
|
|
|
73 |
Area of 3rd Flr: <Numerical Value in SQ.M.>
|
74 |
Area of 4th Flr: <Numerical Value in SQ.M.>
|
75 |
Total Floor Area: <Numerical Value in SQ.M.>
|
76 |
+
'''
|
77 |
+
|
78 |
+
TDRP = '''
|
79 |
+
Desired Format:
|
80 |
+
TD No.: -||-
|
81 |
+
Property Identification No.: -||-
|
82 |
+
Owner: -||-
|
83 |
+
TIN_1: -||-
|
84 |
+
Address_1: -||-
|
85 |
+
Telephone No._1: -||-
|
86 |
+
Administrator/Beneficial User: -||-
|
87 |
+
TIN_2: -||-
|
88 |
+
Address_2: -||-
|
89 |
+
Telephone No._2: -||-
|
90 |
+
Number and Street: -||-
|
91 |
+
Barangay/District: -||-
|
92 |
+
Municipality & Province/City: -||-
|
93 |
+
OCT/TCT/CLOA No.: -||-
|
94 |
+
Survey No.: -||-
|
95 |
+
CCT: -||-
|
96 |
+
Lot No.: -||-
|
97 |
+
Dated: -||-
|
98 |
+
Blk No.: -||-
|
99 |
+
North: -||-
|
100 |
+
South: -||-
|
101 |
+
East: -||-
|
102 |
+
West: -||-
|
103 |
+
Land: -||-
|
104 |
+
Building: -||-
|
105 |
+
No. of Storeys: -||-
|
106 |
+
Brief Description_1: -||-
|
107 |
+
Machinery: -||-
|
108 |
+
Brief Description_2: -||-
|
109 |
+
Others: -||-
|
110 |
+
Specify: -||-
|
111 |
+
Total Assessed Value: -||-
|
112 |
+
Taxable: -||-
|
113 |
+
QTR: -||-
|
114 |
+
Year: -||-
|
115 |
+
This declaration cancels TD No.: -||-
|
116 |
+
Owner: -||-
|
117 |
+
Previous A.V. Php: -||-
|
118 |
+
Memoranda: -||-
|
119 |
+
'''
|
120 |
+
|
121 |
+
TDRP_COORDS = '''
|
122 |
+
North: (193, 580), (600, 640)
|
123 |
'''
|