Sage
commited on
Commit
•
02dad28
1
Parent(s):
e981874
first commit
Browse files- RPFAA Building P1.json +1 -0
- RPFAAP1.json +47 -0
- RPFAAP2.json +37 -0
- advance-river-381411-c7be39c33cff.json +12 -0
- app.log +339 -0
- gr.py +11 -0
- main.py +266 -0
- output.json +146 -0
- requirements.txt +0 -0
- settings.py +94 -0
RPFAA Building P1.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"File Name": "DARAGA-ALCALA-0017", "General Information": {"ARP No.": "2017-04-0000-00009", "Owner": "RUDY MADRONA", "Address": "", "Tel No.": "", "Administrator/Beneficial User": "", "Address:": "", "Tel No.:": "", "PIN": "0310 400 301008-1001", "TIN_1": "", "TIN_2": ""}, "Building Location": {"No. / Street": "", "Brgy/District": "", "Municipality": "", "Province/city": ""}, "Land Reference": {"Owner": "", "OCT/TCT/CLOA NO.": "", "Lot No.": "", "Survey No.": "", "Blk No.": "", "TD/ARP No.:": "", "Area": ""}, "Property Appraisal": {"Kind of Bldg": "", "Structural Type": "", "Bldg. Permit No.": "", "Date Issued": "", "Condominium Certificate of Title(CCT)": "", "Certificate of Completion Issued on": "", "Certificate of Occupancy Issued on": "", "Date Constructed/Completed": "", "Date Occupied": "", "Bldg. Age": "", "No. of Storeys": "", "Area of 1st Flr": "", "Area of 2nd Flr": "", "Area of 3rd Flr": "", "Area of 4th Flr": "", "Total Floor Area": ""}}, {"File Name": "DARAGA-ALCALA-0033", "General Information": {"ARP No.": "2011-64-0003-00017", "Owner": "JULIAN LLANTOS", "Address": "", "Tel No.": "", "Administrator/Beneficial User": "", "Address:": "", "Tel No.:": "", "PIN": "0310400301012100", "TIN_1": "", "TIN_2": ""}, "Building Location": {"No. / Street": "", "Brgy/District": "", "Municipality": "", "Province/city": ""}, "Land Reference": {"Owner": "", "OCT/TCT/CLOA NO.": "", "Lot No.": "", "Survey No.": "", "Blk No.": "", "TD/ARP No.:": "", "Area": ""}, "Property Appraisal": {"Kind of Bldg": "", "Structural Type": "", "Bldg. Permit No.": "", "Date Issued": "", "Condominium Certificate of Title(CCT)": "", "Certificate of Completion Issued on": "", "Certificate of Occupancy Issued on": "", "Date Constructed/Completed": "", "Date Occupied": "", "Bldg. Age": "", "No. of Storeys": "", "Area of 1st Flr": "", "Area of 2nd Flr": "", "Area of 3rd Flr": "", "Area of 4th Flr": "", "Total Floor Area": ""}}, {"File Name": "DARAGA-ALCALA-0071", "General Information": {"ARP No.": "2017-04-0003-00036", "Owner": "ROLANDO LISTANA", "Address": "", "Tel No.": "", "Administrator/Beneficial User": "", "Address:": "", "Tel No.:": "", "PIN": "0310400 301 024-100", "TIN_1": "(no value provided)", "TIN_2": "(no value provided)"}, "Building Location": {"No. / Street": "", "Brgy/District": "", "Municipality": "", "Province/city": ""}, "Land Reference": {"Owner": "", "OCT/TCT/CLOA NO.": "", "Lot No.": "", "Survey No.": "", "Blk No.": "", "TD/ARP No.:": "", "Area": ""}, "Property Appraisal": {"Kind of Bldg": "", "Structural Type": "", "Bldg. Permit No.": "", "Date Issued": "", "Condominium Certificate of Title(CCT)": "", "Certificate of Completion Issued on": "", "Certificate of Occupancy Issued on": "", "Date Constructed/Completed": "", "Date Occupied": "", "Bldg. Age": "", "No. of Storeys": "", "Area of 1st Flr": "", "Area of 2nd Flr": "", "Area of 3rd Flr": "", "Area of 4th Flr": "", "Total Floor Area": ""}}]
|
RPFAAP1.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"File Name": "%s",
|
2 |
+
"General Information": {
|
3 |
+
"ARP No.": "",
|
4 |
+
"Owner": "",
|
5 |
+
"Address": "",
|
6 |
+
"Tel No.": "",
|
7 |
+
"Administrator/Beneficial User": "",
|
8 |
+
"Address:": "",
|
9 |
+
"Tel No.:": "",
|
10 |
+
"PIN": "",
|
11 |
+
"TIN_1": "",
|
12 |
+
"TIN_2": ""
|
13 |
+
},
|
14 |
+
"Building Location": {
|
15 |
+
"No. / Street": "",
|
16 |
+
"Brgy/District": "",
|
17 |
+
"Municipality": "",
|
18 |
+
"Province/city": ""
|
19 |
+
},
|
20 |
+
"Land Reference": {
|
21 |
+
"Owner": "",
|
22 |
+
"OCT/TCT/CLOA NO.": "",
|
23 |
+
"Lot No.": "",
|
24 |
+
"Survey No.": "",
|
25 |
+
"Blk No.": "",
|
26 |
+
"TD/ARP No.:": "",
|
27 |
+
"Area": ""
|
28 |
+
},
|
29 |
+
"Property Appraisal": {
|
30 |
+
"Kind of Bldg": "",
|
31 |
+
"Structural Type": "",
|
32 |
+
"Bldg. Permit No.": "",
|
33 |
+
"Date Issued": "",
|
34 |
+
"Condominium Certificate of Title(CCT)": "",
|
35 |
+
"Certificate of Completion Issued on": "",
|
36 |
+
"Certificate of Occupancy Issued on": "",
|
37 |
+
"Date Constructed/Completed": "",
|
38 |
+
"Date Occupied": "",
|
39 |
+
"Bldg. Age": "",
|
40 |
+
"No. of Storeys": "",
|
41 |
+
"Area of 1st Flr": "",
|
42 |
+
"Area of 2nd Flr": "",
|
43 |
+
"Area of 3rd Flr": "",
|
44 |
+
"Area of 4th Flr": "",
|
45 |
+
"Total Floor Area": ""
|
46 |
+
}
|
47 |
+
}
|
RPFAAP2.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"File Name": "%s",
|
2 |
+
"Additional Items": {
|
3 |
+
"Additional Items": ""
|
4 |
+
},
|
5 |
+
"Property Appraisal": {
|
6 |
+
"Unit Construction Cost (P)": "",
|
7 |
+
"Sub-Total (P)": "",
|
8 |
+
"Depreciation Rate": "",
|
9 |
+
"Depreciation Cost (P)": "",
|
10 |
+
"Cost of Additional Items": "",
|
11 |
+
"Total Construction Cost": "",
|
12 |
+
"Total Percentage Depreciation": "",
|
13 |
+
"Market Value (P)": ""
|
14 |
+
},
|
15 |
+
"Property Assesment": {
|
16 |
+
"Actual Use": "",
|
17 |
+
"Market Value (P)": "",
|
18 |
+
"Assessment Level": "",
|
19 |
+
"Assessed Value (P)": "",
|
20 |
+
"Effectivity of Assessment/Reassessment": ""
|
21 |
+
},
|
22 |
+
"Memoranda": {
|
23 |
+
"Memoranda": "",
|
24 |
+
"Date of Entry in the Record of Assessment": "",
|
25 |
+
"Name": ""
|
26 |
+
},
|
27 |
+
"Record of Superseded Assesment": {
|
28 |
+
"PIN": "",
|
29 |
+
"ARP No.": "",
|
30 |
+
"TD No.": "",
|
31 |
+
"Total Assessed Value": "",
|
32 |
+
"Previous Owner": "",
|
33 |
+
"Effectivity of Assessment": "",
|
34 |
+
"Record Person": "",
|
35 |
+
"Date": ""
|
36 |
+
}
|
37 |
+
}
|
advance-river-381411-c7be39c33cff.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"type": "service_account",
|
3 |
+
"project_id": "advance-river-381411",
|
4 |
+
"private_key_id": "c7be39c33cff4f7d73235768c604b45d34b70828",
|
5 |
+
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQCwzbKttInEvLFp\nBIsVJC1XV+J5+ZfCRrjdAIeTYdtHLsvTn7Y6zaTB99rXTptw7lHvI/KpP5fIFCV5\n8m6W02bifcMya5ELF4DAQOh5z8rllmEcR9IxX4OtP60/7AvFOOf8AeypbnbbMjjw\nTpBa3H4kkHkp3AGJ8FsfDLU61iQ8O/GLRJj2eJ2XUihqEYQjzLj3rBphjkXqSPbW\nXbHwzNXpeA29QAfWcUnEstM7cWKgE9zdcAI3Nav688BDWP0F5N9r9YEZn5Iv2v3J\nfCzRHXx1TfW/T9+otlBtO5kC6RtDaw5KBEYwF3XfGFazy7wMGufYOrwmj0v59BQn\nP43AeDjBAgMBAAECggEABFSLZmX8NFN3SGZONj64NhIO19QxP3a+Nxp415cfWOFY\naGbaXpsUCJSmmVyhYb0SiGgB3NuT/vhgWRnjE8JafBxerCTWjPYexwan8vjQBDrA\nRuIq757iYZtEReMlACf2RZWx+03bbx+uJZOVBUaud7yKnSf4aS2X+70S0L23Ljry\nsDVD+AUOutKO4amJW2zVGoNy0c7FwnCYf3+glZ+aaFlfZciJ4ryPPS3cux6c/U8p\nRzZgrcLIxjn7UdokqbCYJ1qHpaLgFqt6fG1Fn6Mc1Q6Z10c4JU4sgHEwqSe3mDdc\nlsXsh15preljd3+I1d/tdCnORDPQYR0+jpSXcXgfuQKBgQDcylW6JlLmj+IH1LwV\nzU+3FWWJ3EIecVBFv4NvHQkQmoVOuaC56wflLYcAXZyP8k6O76pqSjofW+aevsiQ\ns2fm43XgFy4XUG+rYELJvPu4mj7swWD2SRukxnMO6qjqg2sAzktgWuKAdq+zoX6z\n/Hab1DIm+nJyfYFuRWf9pxlc+QKBgQDM/56jmNw14cXHwKsDhq8ArCQFmss8gtzp\nInmtWIykcPlx+dgEAyT52q3DCkYfYq/mJDZo1Zp5gDWIu9pdKFJzuhcfoyo2WFI8\nwWueOUZuuKPBTQe+htjsBthP/FHamLukmQGi8LVHpwmKgh9dSFuoNLdQc32/AJ/q\nZAqORnSUCQKBgQCHgyhauGrpWCZC6C8IsprapCdOFgH+7U13gbQJ3qhRqIVpbEVZ\n1wdhgi/56XrOXsoYsMDHvAcweBd0F2TCa1q6O6F9iLyhUp03cj2L1JIrG4DDj20T\nvIta9vJnlV9XkJF8TSG2YhHjBvWQKu65SZsCyZp+kfjsjFuEctUAui63AQKBgBKe\nJIo3F8jM+Glr1hw73yjweVUI+exE9ks2Flbn8937ZKw1RKkYoAMRGTbdeADhtZfx\nMf/TZnQicLo6VVqgjtxzyiXVa8ADxXQ/HMcB7KOhoT2tAUcMeCb3eC3LfKOdu2z3\neG6T7eLUCMnLh42xRKHCJ+PmmUT/iYaAD3VccLoBAoGBALULDC98XHe+J14HOh2u\na3hH4Mw7RPaiznvp/BAYIMdtiGH4eh6b9tXMuBPb3nC8qnHcF5HxqAMHHD+JsZj7\nhIlnvC9xHzAm7vqeqO5N5yT9EefkmvAm7g5AXP5k5/21YbRVbD0eSzB2vfV385us\nqKGloin1O/cM+JHXtcm1QhwS\n-----END PRIVATE KEY-----\n",
|
6 |
+
"client_email": "test-983@advance-river-381411.iam.gserviceaccount.com",
|
7 |
+
"client_id": "117808812221688451309",
|
8 |
+
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
9 |
+
"token_uri": "https://oauth2.googleapis.com/token",
|
10 |
+
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
11 |
+
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/test-983%40advance-river-381411.iam.gserviceaccount.com"
|
12 |
+
}
|
app.log
ADDED
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2023-05-11 19:13:33,773 - INFO - The following text was scanned using OCR, your goal is to extract the important entities from the text and correct them with the help of the restrictions placed in the desired format. Remember to not make any changes on the labels of the desired format, simply extract the text, correct it and return only the desired format. Text: ARP No. 2017-04-0000-00009
|
2 |
+
OWNER: RUDY MADRONA
|
3 |
+
Address: ALCALA DARAGA, ALBAY
|
4 |
+
Tel No.:
|
5 |
+
Administrator/Beneficial User:
|
6 |
+
Address:
|
7 |
+
Tel No.:
|
8 |
+
BUILDING LOCATION
|
9 |
+
No. / Street
|
10 |
+
Brgy/District
|
11 |
+
ALCALA
|
12 |
+
DA RAGA
|
13 |
+
Municipality:
|
14 |
+
Province/City
|
15 |
+
ALBAY
|
16 |
+
PROPERTY APPRAISAL
|
17 |
+
Kind of Bldg.
|
18 |
+
Structural Type V
|
19 |
+
Bldg. Permit No.
|
20 |
+
Date Issued
|
21 |
+
Condominium Certificate of Title(CCT)
|
22 |
+
Certificate of Completion Issued On:
|
23 |
+
Certificate of Occupancy Issued On:
|
24 |
+
Date Constructed/Completed:
|
25 |
+
Date Occupied:
|
26 |
+
REAL PROPERTY FIELD APPRAISAL & ASSESSMENT SHEET - BUILDING & OTHER
|
27 |
+
STRUCTURES
|
28 |
+
PIN 0310 400 301008 -1001
|
29 |
+
TIN
|
30 |
+
Tiles
|
31 |
+
STRUCTURAL MATERIALS (Checklist)
|
32 |
+
ROOF
|
33 |
+
Reinforced Concrete
|
34 |
+
G.I. Sheet
|
35 |
+
Aluminum
|
36 |
+
Asbestos
|
37 |
+
Long Span
|
38 |
+
Concrete Desk
|
39 |
+
Nipa/Anahaw/Gogon
|
40 |
+
Others (Specify)
|
41 |
+
FLOORING
|
42 |
+
Reinforced
|
43 |
+
Concrete
|
44 |
+
(for upper
|
45 |
+
floor)
|
46 |
+
Plain Cement
|
47 |
+
TIN
|
48 |
+
Floor Plan:
|
49 |
+
Attach the building plan sketch of floor plan. A photograph may also be attached if necessary.
|
50 |
+
Marble
|
51 |
+
Wood
|
52 |
+
Tiles
|
53 |
+
Others
|
54 |
+
(specify)
|
55 |
+
LAND REFERENCE
|
56 |
+
Owner
|
57 |
+
OCT/TCT/CLOA No.
|
58 |
+
Lot No.
|
59 |
+
MADRONA, DEMETRIO
|
60 |
+
TD/ARP No:
|
61 |
+
Area
|
62 |
+
Bldg. Age
|
63 |
+
No. of Storeys
|
64 |
+
Area of 1st flr:
|
65 |
+
Area of 2nd flr:
|
66 |
+
Area of 3rd flr:
|
67 |
+
Area of 4th flr:
|
68 |
+
TRANSACTION CODE
|
69 |
+
Total Floor Area: 6 SQ.M.
|
70 |
+
1st 2nd 3rd 4th
|
71 |
+
Flr. Flr. Flr. Flr.
|
72 |
+
Plain
|
73 |
+
Cement
|
74 |
+
Wood
|
75 |
+
Walls & 1st 2nd 3rd 4th
|
76 |
+
Partitions Flr. Flr. Flr. Flr.
|
77 |
+
Reinforced
|
78 |
+
Concrete
|
79 |
+
CHB
|
80 |
+
G.I Sheet
|
81 |
+
Survey No. 300
|
82 |
+
Blk No.
|
83 |
+
Build-a-
|
84 |
+
wall
|
85 |
+
Sawali
|
86 |
+
Bamboo
|
87 |
+
Others
|
88 |
+
(Specify)
|
89 |
+
|
90 |
+
Desired Format:
|
91 |
+
ARP No.: <Numerical Value Only, Replace Slashes with the number 1>
|
92 |
+
Owner: <Person's Name>
|
93 |
+
Address: -||-
|
94 |
+
Tel No.: -||-
|
95 |
+
Administrator/Beneficial User: -||-
|
96 |
+
Address: -||-
|
97 |
+
Tel No.: -||-
|
98 |
+
PIN: <Numerical Value Only, Replace Slashes with the number 1>
|
99 |
+
TIN_1: <Numerical Value Only, Replace Slashes with the number 1>
|
100 |
+
TIN_2: <Numerical Value Only, Replace Slashes with the number 1>
|
101 |
+
|
102 |
+
2023-05-11 19:13:37,266 - INFO - ARP No.: 2017-04-0000-00009
|
103 |
+
Owner: RUDY MADRONA
|
104 |
+
Address: ALCALA DARAGA, ALBAY
|
105 |
+
Tel No.:
|
106 |
+
Administrator/Beneficial User: -||-
|
107 |
+
Address: -||-
|
108 |
+
Tel No.: -||-
|
109 |
+
PIN: 0310 400 301008-1001
|
110 |
+
TIN_1: -||-
|
111 |
+
TIN_2: -||-
|
112 |
+
2023-05-11 19:13:38,952 - INFO - The following text was scanned using OCR, your goal is to extract the important entities from the text and correct them with the help of the restrictions placed in the desired format. Remember to not make any changes on the labels of the desired format, simply extract the text, correct it and return only the desired format. Text: ARP Np. 2011-64-0003-00017
|
113 |
+
OWNER: LLANTOS, JULIAN
|
114 |
+
Address: ALCALA, DARAGA, AMBAY
|
115 |
+
Tel No.:
|
116 |
+
Administrator/Beneficial User:
|
117 |
+
Address:
|
118 |
+
Tel No.:
|
119 |
+
BUILDING LOCATION
|
120 |
+
No. / Street
|
121 |
+
Brgy/District
|
122 |
+
ALCALA
|
123 |
+
DARAGA
|
124 |
+
ALBAY
|
125 |
+
Municipality:
|
126 |
+
Province/City
|
127 |
+
PROPERTY APPRAISAL
|
128 |
+
Kind of Bldg.
|
129 |
+
Structural Type 111-C
|
130 |
+
Bldg. Permit No.
|
131 |
+
Date Issued
|
132 |
+
Condominium Certificate of Title(CCT)
|
133 |
+
Certificate of Completion Issued On:
|
134 |
+
Certificate of Occupancy Issued On:
|
135 |
+
REAL PROPERTY FIELD APPRAISAL & ASSESSMENT SHEET - BUILDING & OTHER
|
136 |
+
STRUCTURES
|
137 |
+
PIN 0310 400301012-100)
|
138 |
+
TIN
|
139 |
+
Date Constructed/Completed:
|
140 |
+
Date Occupied:
|
141 |
+
STRUCTURAL MATERIALS (Checklist)
|
142 |
+
Tiles
|
143 |
+
ROOF
|
144 |
+
Reinforced Concrete
|
145 |
+
G.I. Sheet
|
146 |
+
Aluminum
|
147 |
+
Asbestos
|
148 |
+
Long Span
|
149 |
+
Concrete Desk
|
150 |
+
Nipa/Anahaw/Gogon
|
151 |
+
Others (Specify)
|
152 |
+
Floor Plan:
|
153 |
+
Attach the building plan sketch of floor plan. A photograph may also be attached if necessary.
|
154 |
+
FLOORING
|
155 |
+
Reinforced
|
156 |
+
Concrete
|
157 |
+
(for upper
|
158 |
+
floor)
|
159 |
+
Plain Cement
|
160 |
+
Marble
|
161 |
+
Wood
|
162 |
+
Tiles
|
163 |
+
TIN
|
164 |
+
Others
|
165 |
+
(specify)
|
166 |
+
LAND REFERENCE
|
167 |
+
Owner
|
168 |
+
OCT/TCT/CLOA No.
|
169 |
+
Lot No. 3095-P
|
170 |
+
TD/ARP No:
|
171 |
+
Area
|
172 |
+
Bldg. Age
|
173 |
+
No. of Storeys
|
174 |
+
Area of 1st flr:
|
175 |
+
Area of 2nd flr:
|
176 |
+
Area of 3rd flr:
|
177 |
+
Area of 4th flr:
|
178 |
+
Total Floor Area: 49 SQ.M.
|
179 |
+
1st 2nd 3rd 4th
|
180 |
+
Flr. Flr. Flr. Flr.
|
181 |
+
TRANSACTION CODE
|
182 |
+
Walls &
|
183 |
+
Partitions
|
184 |
+
Reinforced
|
185 |
+
Concrete
|
186 |
+
Plain
|
187 |
+
Cement
|
188 |
+
Wood
|
189 |
+
CHB
|
190 |
+
G.I Sheet
|
191 |
+
Build-a-
|
192 |
+
wall
|
193 |
+
Sawali
|
194 |
+
Bamboo
|
195 |
+
Others
|
196 |
+
(Specify)
|
197 |
+
Survey No.
|
198 |
+
Bik No.
|
199 |
+
1st 2nd 3rd 4th
|
200 |
+
Flr. Fir. Flr. Flr.
|
201 |
+
|
202 |
+
Desired Format:
|
203 |
+
ARP No.: <Numerical Value Only, Replace Slashes with the number 1>
|
204 |
+
Owner: <Person's Name>
|
205 |
+
Address: -||-
|
206 |
+
Tel No.: -||-
|
207 |
+
Administrator/Beneficial User: -||-
|
208 |
+
Address: -||-
|
209 |
+
Tel No.: -||-
|
210 |
+
PIN: <Numerical Value Only, Replace Slashes with the number 1>
|
211 |
+
TIN_1: <Numerical Value Only, Replace Slashes with the number 1>
|
212 |
+
TIN_2: <Numerical Value Only, Replace Slashes with the number 1>
|
213 |
+
|
214 |
+
2023-05-11 19:13:42,282 - INFO - ARP No.: 2011-64-0003-00017
|
215 |
+
Owner: JULIAN LLANTOS
|
216 |
+
Address: ALCALA, DARAGA, ALBAY
|
217 |
+
Tel No.: -||-
|
218 |
+
Administrator/Beneficial User: -||-
|
219 |
+
Address: -||-
|
220 |
+
Tel No.: -||-
|
221 |
+
PIN: 0310400301012100
|
222 |
+
TIN_1: -||-
|
223 |
+
TIN_2: -||-
|
224 |
+
2023-05-11 19:13:44,006 - INFO - The following text was scanned using OCR, your goal is to extract the important entities from the text and correct them with the help of the restrictions placed in the desired format. Remember to not make any changes on the labels of the desired format, simply extract the text, correct it and return only the desired format. Text: ARP No. 2017-04-0003-00036
|
225 |
+
OWNER: LISTANA, ROLANDO
|
226 |
+
Address: ALCAL� DARAGA, ALBAY
|
227 |
+
Tel No.:
|
228 |
+
Administrator/Beneficial User:
|
229 |
+
Address:
|
230 |
+
Tel No.:
|
231 |
+
BUILDING LOCATION
|
232 |
+
No. / Street
|
233 |
+
Brgy/District
|
234 |
+
Municipality:
|
235 |
+
ALCALA
|
236 |
+
Province/City
|
237 |
+
REAL PROPERTY FIELD APPRAISAL & ASSESSMENT SHEET - BUILDING & OTHER
|
238 |
+
STRUCTURES
|
239 |
+
DARALA
|
240 |
+
ALBAY
|
241 |
+
PROPERTY APPRAISAL
|
242 |
+
Kind of Bldg.
|
243 |
+
Structural Type V
|
244 |
+
Bldg. Permit No.
|
245 |
+
Date Issued
|
246 |
+
Condominium Certificate of Title(CCT)
|
247 |
+
Certificate of Completion Issued On:
|
248 |
+
Certificate of Occupancy Issued On:
|
249 |
+
Date Constructed/Completed: 1980
|
250 |
+
Date Occupied:
|
251 |
+
Tiles
|
252 |
+
STRUCTURAL MATERIALS (Checklist)
|
253 |
+
ROOF
|
254 |
+
Reinforced Concrete
|
255 |
+
G.I. Sheet
|
256 |
+
Aluminum
|
257 |
+
Asbestos
|
258 |
+
Long Span
|
259 |
+
Concrete Desk
|
260 |
+
Nipa/Anahaw/Gogon
|
261 |
+
Others (Specify)
|
262 |
+
FLOORING
|
263 |
+
Reinforced
|
264 |
+
Concrete
|
265 |
+
(for upper
|
266 |
+
floor)
|
267 |
+
Plain Cement
|
268 |
+
Marble
|
269 |
+
Wood
|
270 |
+
Tiles
|
271 |
+
Others
|
272 |
+
(specify)
|
273 |
+
PIN 0310400 301 024 -100
|
274 |
+
TIN
|
275 |
+
TIN
|
276 |
+
LAND REFERENCE
|
277 |
+
Owner
|
278 |
+
LISTANA
|
279 |
+
OCT/TCT/CLOA No.
|
280 |
+
Lot No.
|
281 |
+
Floor Plan:
|
282 |
+
Attach the building plan sketch of floor plan. A photograph may also be attached if necessary.
|
283 |
+
TD/ARP No:
|
284 |
+
Area
|
285 |
+
I
|
286 |
+
2798
|
287 |
+
Bldg. Age
|
288 |
+
No. of Storeys
|
289 |
+
Area of 1st flr:
|
290 |
+
Area of 2nd flr:
|
291 |
+
Area of 3rd flr:
|
292 |
+
Area of 4th flr:
|
293 |
+
TRANSACTION CODE
|
294 |
+
Total Floor Area: 125Q-m�
|
295 |
+
1st 2nd 3rd 4th
|
296 |
+
Flr. Flr. Flr. Flr.
|
297 |
+
MARIAND
|
298 |
+
*
|
299 |
+
Walls & 1st 2nd 3rd 4th
|
300 |
+
Partitions Flr. Flr. Flr. Flr.
|
301 |
+
Reinforced
|
302 |
+
Concrete
|
303 |
+
Plain
|
304 |
+
Cement
|
305 |
+
Wood
|
306 |
+
CHB
|
307 |
+
G.1 Sheet
|
308 |
+
Survey No. 4684
|
309 |
+
Blk No.
|
310 |
+
Build-a-
|
311 |
+
wall
|
312 |
+
Sawali
|
313 |
+
Bamboo
|
314 |
+
Others
|
315 |
+
(Specify)
|
316 |
+
|
317 |
+
Desired Format:
|
318 |
+
ARP No.: <Numerical Value Only, Replace Slashes with the number 1>
|
319 |
+
Owner: <Person's Name>
|
320 |
+
Address: -||-
|
321 |
+
Tel No.: -||-
|
322 |
+
Administrator/Beneficial User: -||-
|
323 |
+
Address: -||-
|
324 |
+
Tel No.: -||-
|
325 |
+
PIN: <Numerical Value Only, Replace Slashes with the number 1>
|
326 |
+
TIN_1: <Numerical Value Only, Replace Slashes with the number 1>
|
327 |
+
TIN_2: <Numerical Value Only, Replace Slashes with the number 1>
|
328 |
+
|
329 |
+
2023-05-11 19:13:48,089 - INFO - ARP No.: 2017-04-0003-00036
|
330 |
+
Owner: ROLANDO LISTANA
|
331 |
+
Address: ALCALA DARAGA, ALBAY
|
332 |
+
Tel No.: -||-
|
333 |
+
Administrator/Beneficial User: -||-
|
334 |
+
Address: -||-
|
335 |
+
Tel No.: -||-
|
336 |
+
PIN: 0310400 301 024-100
|
337 |
+
TIN_1: -||- (no value provided)
|
338 |
+
TIN_2: -||- (no value provided)
|
339 |
+
2023-05-11 19:13:48,089 - INFO - [{'File Name': 'DARAGA-ALCALA-0017', 'General Information': {'ARP No.': '2017-04-0000-00009', 'Owner': 'RUDY MADRONA', 'Address': '', 'Tel No.': '', 'Administrator/Beneficial User': '', 'Address:': '', 'Tel No.:': '', 'PIN': '0310 400 301008-1001', 'TIN_1': '', 'TIN_2': ''}, 'Building Location': {'No. / Street': '', 'Brgy/District': '', 'Municipality': '', 'Province/city': ''}, 'Land Reference': {'Owner': '', 'OCT/TCT/CLOA NO.': '', 'Lot No.': '', 'Survey No.': '', 'Blk No.': '', 'TD/ARP No.:': '', 'Area': ''}, 'Property Appraisal': {'Kind of Bldg': '', 'Structural Type': '', 'Bldg. Permit No.': '', 'Date Issued': '', 'Condominium Certificate of Title(CCT)': '', 'Certificate of Completion Issued on': '', 'Certificate of Occupancy Issued on': '', 'Date Constructed/Completed': '', 'Date Occupied': '', 'Bldg. Age': '', 'No. of Storeys': '', 'Area of 1st Flr': '', 'Area of 2nd Flr': '', 'Area of 3rd Flr': '', 'Area of 4th Flr': '', 'Total Floor Area': ''}}, {'File Name': 'DARAGA-ALCALA-0033', 'General Information': {'ARP No.': '2011-64-0003-00017', 'Owner': 'JULIAN LLANTOS', 'Address': '', 'Tel No.': '', 'Administrator/Beneficial User': '', 'Address:': '', 'Tel No.:': '', 'PIN': '0310400301012100', 'TIN_1': '', 'TIN_2': ''}, 'Building Location': {'No. / Street': '', 'Brgy/District': '', 'Municipality': '', 'Province/city': ''}, 'Land Reference': {'Owner': '', 'OCT/TCT/CLOA NO.': '', 'Lot No.': '', 'Survey No.': '', 'Blk No.': '', 'TD/ARP No.:': '', 'Area': ''}, 'Property Appraisal': {'Kind of Bldg': '', 'Structural Type': '', 'Bldg. Permit No.': '', 'Date Issued': '', 'Condominium Certificate of Title(CCT)': '', 'Certificate of Completion Issued on': '', 'Certificate of Occupancy Issued on': '', 'Date Constructed/Completed': '', 'Date Occupied': '', 'Bldg. Age': '', 'No. of Storeys': '', 'Area of 1st Flr': '', 'Area of 2nd Flr': '', 'Area of 3rd Flr': '', 'Area of 4th Flr': '', 'Total Floor Area': ''}}, {'File Name': 'DARAGA-ALCALA-0071', 'General Information': {'ARP No.': '2017-04-0003-00036', 'Owner': 'ROLANDO LISTANA', 'Address': '', 'Tel No.': '', 'Administrator/Beneficial User': '', 'Address:': '', 'Tel No.:': '', 'PIN': '0310400 301 024-100', 'TIN_1': '(no value provided)', 'TIN_2': '(no value provided)'}, 'Building Location': {'No. / Street': '', 'Brgy/District': '', 'Municipality': '', 'Province/city': ''}, 'Land Reference': {'Owner': '', 'OCT/TCT/CLOA NO.': '', 'Lot No.': '', 'Survey No.': '', 'Blk No.': '', 'TD/ARP No.:': '', 'Area': ''}, 'Property Appraisal': {'Kind of Bldg': '', 'Structural Type': '', 'Bldg. Permit No.': '', 'Date Issued': '', 'Condominium Certificate of Title(CCT)': '', 'Certificate of Completion Issued on': '', 'Certificate of Occupancy Issued on': '', 'Date Constructed/Completed': '', 'Date Occupied': '', 'Bldg. Age': '', 'No. of Storeys': '', 'Area of 1st Flr': '', 'Area of 2nd Flr': '', 'Area of 3rd Flr': '', 'Area of 4th Flr': '', 'Total Floor Area': ''}}]
|
gr.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import gradio as gr
|
3 |
+
|
4 |
+
def save_json(text):
|
5 |
+
with open("output.json", "w") as outfile:
|
6 |
+
json.dump(text, outfile)
|
7 |
+
|
8 |
+
return "output.json"
|
9 |
+
|
10 |
+
demo = gr.Interface(save_text_as_json, "text", "file")
|
11 |
+
demo.launch()
|
main.py
ADDED
@@ -0,0 +1,266 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import openai
|
3 |
+
import gradio as gr
|
4 |
+
import requests
|
5 |
+
import datetime
|
6 |
+
from io import BytesIO
|
7 |
+
from google.api_core.client_options import ClientOptions
|
8 |
+
from google.cloud import documentai_v1 as documentai
|
9 |
+
import json
|
10 |
+
from google.cloud import vision
|
11 |
+
import time
|
12 |
+
from settings import char_remove, gpt_api_key, gpt_model, RPFAAP2, RPFAAP1, project_id, project_location, processor_id
|
13 |
+
from tqdm import tqdm
|
14 |
+
import logging
|
15 |
+
|
16 |
+
logging.basicConfig(filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
17 |
+
|
18 |
+
def chat_gpt_image(content, context):
|
19 |
+
openai.api_key = gpt_api_key
|
20 |
+
prompt = "You are an expert at identifying OCR errors and correcting them with the help of context, intuition and logic."
|
21 |
+
document = "The following text was scanned using OCR, your goal is to return a corrected version of the text"
|
22 |
+
prefix = "Additionally"
|
23 |
+
if context == "":
|
24 |
+
sequence = (document, content)
|
25 |
+
else:
|
26 |
+
sequence_1 = (prefix, context)
|
27 |
+
additional = (" ".join(sequence_1))
|
28 |
+
sequence = (additional, content)
|
29 |
+
|
30 |
+
final_content = (" ".join(sequence))
|
31 |
+
logging.info(final_content)
|
32 |
+
completion = openai.ChatCompletion.create(
|
33 |
+
model=gpt_model,
|
34 |
+
user="1",
|
35 |
+
messages=[
|
36 |
+
{"role": "system", "content": prompt},
|
37 |
+
{"role": "user", "content": final_content}
|
38 |
+
]
|
39 |
+
)
|
40 |
+
logging.info(completion.choices[0].message.content)
|
41 |
+
return(completion.choices[0].message.content)
|
42 |
+
|
43 |
+
def remove_na(string):
|
44 |
+
for char in char_remove:
|
45 |
+
string = string.replace(char, "")
|
46 |
+
return string
|
47 |
+
|
48 |
+
def chat_gpt_document(content, document_type, context):
|
49 |
+
openai.api_key = gpt_api_key
|
50 |
+
prompt = "You are an expert at identifying OCR errors and correcting them with the help of context, intuition and logic."
|
51 |
+
document_prefix = "The following text was scanned using OCR, your goal is to extract the important entities from the text and correct them with the help of the restrictions placed in the desired format. Remember to not make any changes on the labels of the desired format, simply extract the text, correct it and return only the desired format. Text:"
|
52 |
+
additional_prefix = "Additionally the text"
|
53 |
+
|
54 |
+
content_info = content[0]
|
55 |
+
content_name = content[1]
|
56 |
+
if document_type == "RPFAA Building P1":
|
57 |
+
document = "RPFAAP1.json"
|
58 |
+
desired_format = RPFAAP1
|
59 |
+
elif document_type == "RPFAA Building P2":
|
60 |
+
document = "RPFAAP2.json"
|
61 |
+
desired_format = RPFAAP2
|
62 |
+
else:
|
63 |
+
document = ""
|
64 |
+
desired_format = ""
|
65 |
+
|
66 |
+
if context == "":
|
67 |
+
sequence_1 = (document_prefix, content_info, desired_format)
|
68 |
+
else:
|
69 |
+
sequence_1 = (document_prefix, content_info, desired_format, additional_prefix, context)
|
70 |
+
|
71 |
+
content_1 = (" ".join(sequence_1))
|
72 |
+
logging.info(content_1)
|
73 |
+
|
74 |
+
completion_1 = openai.ChatCompletion.create(
|
75 |
+
model=gpt_model,
|
76 |
+
user="1",
|
77 |
+
messages=[
|
78 |
+
{"role": "system", "content": prompt},
|
79 |
+
{"role": "user", "content": content_1}
|
80 |
+
]
|
81 |
+
)
|
82 |
+
logging.info(completion_1.choices[0].message.content)
|
83 |
+
input_string = remove_na(completion_1.choices[0].message.content)
|
84 |
+
|
85 |
+
with open(document) as f:
|
86 |
+
property_info = json.load(f)
|
87 |
+
#Adds the name of the file
|
88 |
+
property_info["File Name"] = content_name
|
89 |
+
#Fills in the information
|
90 |
+
for line in input_string.split('\n'):
|
91 |
+
if ':' in line:
|
92 |
+
key, value = line.split(':', 1)
|
93 |
+
key = key.strip()
|
94 |
+
for category in property_info:
|
95 |
+
if key in property_info[category]:
|
96 |
+
property_info[category][key] = value.strip()
|
97 |
+
break
|
98 |
+
else:
|
99 |
+
if key == "File Name":
|
100 |
+
property_info[key] = value.strip()
|
101 |
+
return json.dumps(property_info, indent=4)
|
102 |
+
|
103 |
+
# def get_openai_api_usage():
|
104 |
+
# openai.api_key = "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"
|
105 |
+
# api_key = "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"
|
106 |
+
# org_id = "org-lqZ72EJMjCjjXdRwPNfys6YO"
|
107 |
+
# session = requests.Session()
|
108 |
+
# headers = {
|
109 |
+
# "Authorization": f"Bearer {api_key}",
|
110 |
+
# "OpenAI-Organization": org_id
|
111 |
+
# }
|
112 |
+
|
113 |
+
# # Define the start and end dates for the usage data
|
114 |
+
# today = datetime.date.today()
|
115 |
+
# start_date = today - datetime.timedelta(days=30)
|
116 |
+
# end_date = today
|
117 |
+
|
118 |
+
# # Make the API call to retrieve the usage data
|
119 |
+
# url = f"https://api.openai.com/v1/usage?date=2023-03-29"
|
120 |
+
# response = session.get(url, headers=headers)
|
121 |
+
# response.raise_for_status()
|
122 |
+
# usage_data = response.json().get("data", [])
|
123 |
+
# print(usage_data)
|
124 |
+
# for item in usage_data:
|
125 |
+
# print(f"Date: {item['aggregation_timestamp']}")
|
126 |
+
# print(f"Requests: {item['n_requests']}")
|
127 |
+
# print(f"Tokens: {item['n_context_tokens_total']}")
|
128 |
+
# print(f"Model ID: {item['n_generated_tokens_total']}")
|
129 |
+
|
130 |
+
def detect_image(content, lang):
|
131 |
+
|
132 |
+
os.environ['GOOGLE_APPLICATION_CREDENTIALS']=r'advance-river-381411-c7be39c33cff.json'
|
133 |
+
client = vision.ImageAnnotatorClient()
|
134 |
+
buffer = BytesIO()
|
135 |
+
content.save(buffer, format="PNG")
|
136 |
+
content = buffer.getvalue()
|
137 |
+
if lang == "Filpino":
|
138 |
+
hints = "tl"
|
139 |
+
else:
|
140 |
+
hints = "en"
|
141 |
+
image = vision.Image(content=content)
|
142 |
+
|
143 |
+
response = client.document_text_detection(image=image, image_context={"language_hints": [hints]})
|
144 |
+
|
145 |
+
if response.error.message:
|
146 |
+
raise Exception(
|
147 |
+
'{}\nFor more info on error messages, check: '
|
148 |
+
'https://cloud.google.com/apis/design/errors'.format(
|
149 |
+
response.error.message))
|
150 |
+
return(response.full_text_annotation.text)
|
151 |
+
|
152 |
+
def detect_document(content):
|
153 |
+
os.environ['GOOGLE_APPLICATION_CREDENTIALS']=r'advance-river-381411-c7be39c33cff.json'
|
154 |
+
PROJECT_ID = project_id
|
155 |
+
LOCATION = project_location # Format is 'us' or 'eu'
|
156 |
+
PROCESSOR_ID = processor_id # Create processor in Cloud Console
|
157 |
+
content_extension = content.name.split(".")[-1]
|
158 |
+
|
159 |
+
if content_extension.upper() == "TIFF":
|
160 |
+
MIME_TYPE = "image/tiff"
|
161 |
+
elif content_extension.upper() =="PDF":
|
162 |
+
MIME_TYPE = "application/pdf"
|
163 |
+
elif content_extension.upper() =="PNG":
|
164 |
+
MIME_TYPE = "image/png"
|
165 |
+
elif content_extension.upper() =="JPG":
|
166 |
+
MIME_TYPE = "image/jpg"
|
167 |
+
else:
|
168 |
+
return("Please upload a valid MIME type")
|
169 |
+
|
170 |
+
docai_client = documentai.DocumentProcessorServiceClient(
|
171 |
+
client_options=ClientOptions(api_endpoint=f"{LOCATION}-documentai.googleapis.com")
|
172 |
+
)
|
173 |
+
|
174 |
+
RESOURCE_NAME = docai_client.processor_path(PROJECT_ID, LOCATION, PROCESSOR_ID)
|
175 |
+
|
176 |
+
with open(content.name, "rb") as image:
|
177 |
+
image_content = image.read()
|
178 |
+
|
179 |
+
raw_document = documentai.RawDocument(content=image_content, mime_type=MIME_TYPE)
|
180 |
+
|
181 |
+
request = documentai.ProcessRequest(name=RESOURCE_NAME, raw_document=raw_document)
|
182 |
+
|
183 |
+
result = docai_client.process_document(request=request)
|
184 |
+
|
185 |
+
document_object = result.document
|
186 |
+
|
187 |
+
name = content.name.split('\\')[-1]
|
188 |
+
name = name.split('.')[0]
|
189 |
+
|
190 |
+
return(document_object.text, name)
|
191 |
+
|
192 |
+
def image(content, lang, context):
|
193 |
+
return chat_gpt_image(detect_image(content, lang), context)
|
194 |
+
|
195 |
+
def document(content, document_type, context):
|
196 |
+
return chat_gpt_document(detect_document(content),document_type,context)
|
197 |
+
|
198 |
+
def batch_document(content, document_type, context, progress=gr.Progress()):
|
199 |
+
progress(0, desc="Starting")
|
200 |
+
retries = 5
|
201 |
+
timeout = 5
|
202 |
+
i = 0
|
203 |
+
j = 0
|
204 |
+
combined_data = []
|
205 |
+
for x in progress.tqdm(content, desc="Processing"):
|
206 |
+
while True:
|
207 |
+
try:
|
208 |
+
data = json.loads(chat_gpt_document(detect_document(x),document_type,context))
|
209 |
+
combined_data.append(data)
|
210 |
+
break
|
211 |
+
except openai.error.APIConnectionError:
|
212 |
+
logging.error(f'Retry {i+1} failed: openai.error.APIConnectionError')
|
213 |
+
if i < retries - 1:
|
214 |
+
logging.error(f'Retrying in {timeout} seconds...')
|
215 |
+
time.sleep(timeout)
|
216 |
+
i += 1
|
217 |
+
except openai.error.RateLimitError:
|
218 |
+
logging.error(f'Retry {j+1} failed: openai.error.RateLimitError')
|
219 |
+
if j < retries - 1:
|
220 |
+
logging.error(f'Retrying in {timeout} seconds...')
|
221 |
+
time.sleep(timeout)
|
222 |
+
j += 1
|
223 |
+
logging.info(combined_data)
|
224 |
+
return save_json(combined_data, document_type)
|
225 |
+
|
226 |
+
def save_json(text, filename):
|
227 |
+
filename = filename+".json"
|
228 |
+
with open(filename, "w") as outfile:
|
229 |
+
json.dump(text, outfile)
|
230 |
+
return filename
|
231 |
+
|
232 |
+
with gr.Blocks(title="Ottico OCR", css=".markdown {text-align: center;}") as app:
|
233 |
+
gr.Markdown("""# Ottico OCR
|
234 |
+
Attach Images or Files below and convert them to Text.""", elem_classes="markdown")
|
235 |
+
with gr.Tab("Scan Image"):
|
236 |
+
with gr.Row():
|
237 |
+
with gr.Column():
|
238 |
+
image_input = [gr.Image(type="pil"),
|
239 |
+
gr.Radio(["English", "Filipino"], label="Language", info="What is the document language? (Optional)"),
|
240 |
+
gr.Textbox(label="What kind of Image is this? (Optional)", placeholder="This is an image of an Official Reciept")]
|
241 |
+
image_output = gr.Textbox(label="Result")
|
242 |
+
image_button = gr.Button("Scan")
|
243 |
+
with gr.Tab("Scan Document"):
|
244 |
+
with gr.Row():
|
245 |
+
with gr.Column():
|
246 |
+
document_input = [gr.File(file_types=["pdf","tiff","image","text"]),
|
247 |
+
gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2"], label="File Type", info="What type of document is this?"),
|
248 |
+
gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
|
249 |
+
document_output = gr.Textbox(label="Result")
|
250 |
+
document_button = gr.Button("Scan")
|
251 |
+
with gr.Tab("Batch Scan"):
|
252 |
+
with gr.Row():
|
253 |
+
with gr.Column():
|
254 |
+
batch_document_input = [gr.File(file_types=["pdf","tiff","image","text"], file_count="multiple"),
|
255 |
+
gr.Dropdown(["RPFAA Building P1", "RPFAA Building P2"], label="File Type", info="What type of document is this?"),
|
256 |
+
gr.Textbox(label="Any additional information? (Optional)", placeholder="This is document is an Official Reciept")]
|
257 |
+
batch_document_output = gr.File(label="Result")
|
258 |
+
batch_document_button = gr.Button("Scan")
|
259 |
+
|
260 |
+
|
261 |
+
image_button.click(image, inputs=image_input, outputs=image_output)
|
262 |
+
document_button.click(document, inputs=document_input, outputs=document_output)
|
263 |
+
batch_document_button.click(batch_document, inputs=batch_document_input, outputs=batch_document_output)
|
264 |
+
|
265 |
+
app.queue()
|
266 |
+
app.launch(share=True, auth=("username", "password"))
|
output.json
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"File Name": "DARAGA-ALCALA-0013",
|
4 |
+
"General Information": {
|
5 |
+
"ARP No.": "1017-64-0003-00007",
|
6 |
+
"Owner": "SAME",
|
7 |
+
"Address": "",
|
8 |
+
"Tel No.": "",
|
9 |
+
"Administrator/Beneficial User": "",
|
10 |
+
"Address:": "",
|
11 |
+
"Tel No.:": "",
|
12 |
+
"PIN": "03/0400301007-1001",
|
13 |
+
"TIN_1": "",
|
14 |
+
"TIN_2": ""
|
15 |
+
},
|
16 |
+
"Building Location": {
|
17 |
+
"No. / Street": "",
|
18 |
+
"Brgy/District": "ALCALA",
|
19 |
+
"Municipality": "DARAGA",
|
20 |
+
"Province/city": "ALBAY"
|
21 |
+
},
|
22 |
+
"Land Reference": {
|
23 |
+
"Owner": "",
|
24 |
+
"OCT/TCT/CLOA NO.": "",
|
25 |
+
"Lot No.": "",
|
26 |
+
"Survey No.": "3096-P",
|
27 |
+
"Blk No.": "",
|
28 |
+
"TD/ARP No.:": "",
|
29 |
+
"Area": "+6647 sq.m."
|
30 |
+
},
|
31 |
+
"Property Appraisal": {
|
32 |
+
"Kind of Bldg": "",
|
33 |
+
"Structural Type": "M-C",
|
34 |
+
"Bldg. Permit No.": "",
|
35 |
+
"Date Issued": "",
|
36 |
+
"Condominium Certificate of Title(CCT)": "",
|
37 |
+
"Certificate of Completion Issued on": "",
|
38 |
+
"Certificate of Occupancy Issued on": "",
|
39 |
+
"Date Constructed/Completed": "",
|
40 |
+
"Date Occupied": "",
|
41 |
+
"Bldg. Age": "",
|
42 |
+
"No. of Storeys": "",
|
43 |
+
"Area of 1st Flr": "-||-",
|
44 |
+
"Area of 2nd Flr": "-||-",
|
45 |
+
"Area of 3rd Flr": "-||-",
|
46 |
+
"Area of 4th Flr": "-||-",
|
47 |
+
"Total Floor Area": "12 sq.m."
|
48 |
+
}
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"File Name": "DARAGA-ALCALA-0017",
|
52 |
+
"General Information": {
|
53 |
+
"ARP No.": "2017-04-0000-00009",
|
54 |
+
"Owner": "Demetrio Madrona",
|
55 |
+
"Address": "",
|
56 |
+
"Tel No.": "",
|
57 |
+
"Administrator/Beneficial User": "",
|
58 |
+
"Address:": "",
|
59 |
+
"Tel No.:": "",
|
60 |
+
"PIN": "0310 400 301008-1001",
|
61 |
+
"TIN_1": "",
|
62 |
+
"TIN_2": ""
|
63 |
+
},
|
64 |
+
"Building Location": {
|
65 |
+
"No. / Street": "",
|
66 |
+
"Brgy/District": "Alcala Daraga",
|
67 |
+
"Municipality": "Albay",
|
68 |
+
"Province/city": "Albay"
|
69 |
+
},
|
70 |
+
"Land Reference": {
|
71 |
+
"Owner": "",
|
72 |
+
"OCT/TCT/CLOA NO.": "",
|
73 |
+
"Lot No.": "",
|
74 |
+
"Survey No.": "300",
|
75 |
+
"Blk No.": "",
|
76 |
+
"TD/ARP No.:": "",
|
77 |
+
"Area": "6 SQ.M."
|
78 |
+
},
|
79 |
+
"Property Appraisal": {
|
80 |
+
"Kind of Bldg": "",
|
81 |
+
"Structural Type": "V",
|
82 |
+
"Bldg. Permit No.": "",
|
83 |
+
"Date Issued": "",
|
84 |
+
"Condominium Certificate of Title(CCT)": "",
|
85 |
+
"Certificate of Completion Issued on": "",
|
86 |
+
"Certificate of Occupancy Issued on": "",
|
87 |
+
"Date Constructed/Completed": "",
|
88 |
+
"Date Occupied": "",
|
89 |
+
"Bldg. Age": "",
|
90 |
+
"No. of Storeys": "",
|
91 |
+
"Area of 1st Flr": "",
|
92 |
+
"Area of 2nd Flr": "",
|
93 |
+
"Area of 3rd Flr": "",
|
94 |
+
"Area of 4th Flr": "",
|
95 |
+
"Total Floor Area": "6 SQ.M."
|
96 |
+
}
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"File Name": "DARAGA-ALCALA-0019",
|
100 |
+
"General Information": {
|
101 |
+
"ARP No.": "01 04-00 000 0",
|
102 |
+
"Owner": "DEMETRIO MADRONA",
|
103 |
+
"Address": "",
|
104 |
+
"Tel No.": "",
|
105 |
+
"Administrator/Beneficial User": "",
|
106 |
+
"Address:": "",
|
107 |
+
"Tel No.:": "",
|
108 |
+
"PIN": "03 400301008 -1002",
|
109 |
+
"TIN_1": "",
|
110 |
+
"TIN_2": ""
|
111 |
+
},
|
112 |
+
"Building Location": {
|
113 |
+
"No. / Street": "",
|
114 |
+
"Brgy/District": "ALCALA",
|
115 |
+
"Municipality": "DARAGA",
|
116 |
+
"Province/city": "ALBAY"
|
117 |
+
},
|
118 |
+
"Land Reference": {
|
119 |
+
"Owner": "",
|
120 |
+
"OCT/TCT/CLOA NO.": "",
|
121 |
+
"Lot No.": "",
|
122 |
+
"Survey No.": "",
|
123 |
+
"Blk No.": "",
|
124 |
+
"TD/ARP No.:": "",
|
125 |
+
"Area": "6 SQ.M."
|
126 |
+
},
|
127 |
+
"Property Appraisal": {
|
128 |
+
"Kind of Bldg": "",
|
129 |
+
"Structural Type": "IV",
|
130 |
+
"Bldg. Permit No.": "",
|
131 |
+
"Date Issued": "",
|
132 |
+
"Condominium Certificate of Title(CCT)": "",
|
133 |
+
"Certificate of Completion Issued on": "",
|
134 |
+
"Certificate of Occupancy Issued on": "",
|
135 |
+
"Date Constructed/Completed": "",
|
136 |
+
"Date Occupied": "",
|
137 |
+
"Bldg. Age": "",
|
138 |
+
"No. of Storeys": "",
|
139 |
+
"Area of 1st Flr": "",
|
140 |
+
"Area of 2nd Flr": "",
|
141 |
+
"Area of 3rd Flr": "",
|
142 |
+
"Area of 4th Flr": "",
|
143 |
+
"Total Floor Area": "6 SQ.M."
|
144 |
+
}
|
145 |
+
}
|
146 |
+
]
|
requirements.txt
ADDED
Binary file (9.51 kB). View file
|
|
settings.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#OpenAI Variables
|
2 |
+
gpt_api_key = "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"
|
3 |
+
gpt_model = "gpt-3.5-turbo"
|
4 |
+
|
5 |
+
#Google Variables
|
6 |
+
project_id = "advance-river-381411"
|
7 |
+
project_location = "us"
|
8 |
+
processor_id = "31bc9a6106cb3cac"
|
9 |
+
|
10 |
+
#Company Specfic Variables
|
11 |
+
char_remove = ["N/A", "(Use additional sheet if necessary)", "(not provided)", "Not specified", "-||-", "0 sq.m."]
|
12 |
+
|
13 |
+
#Company Documents
|
14 |
+
RPFAAP2 = '''
|
15 |
+
Desired Format:
|
16 |
+
Additional Items: -||-
|
17 |
+
Unit Construction Cost (P): <Monetary Value>
|
18 |
+
Sub-Total (P): <Monetary Value>
|
19 |
+
Depreciation Rate: -||-
|
20 |
+
Depreciation Cost (P): <Monetary Value>
|
21 |
+
Cost of Additional Items: -||-
|
22 |
+
Total Construction Cost: -||-
|
23 |
+
Total Percentage Depreciation: -||-
|
24 |
+
Market Value (P): <Monetary Value>
|
25 |
+
Actual Use: -||-
|
26 |
+
Market Value (P): <Monetary Value>
|
27 |
+
Assessment Level: -||-
|
28 |
+
Assessed Value (P): <Monetary Value>
|
29 |
+
Effectivity of Assessment/Reassessment: -||-
|
30 |
+
Memoranda: -||-
|
31 |
+
Date of Entry in the Record of Assessment: -||-
|
32 |
+
Name: -||-
|
33 |
+
PIN: <Numerical Value Only, Replace Slashes with the number 1>
|
34 |
+
ARP No.: <Numerical Value Only, Replace Slashes with the number 1>
|
35 |
+
TD No.: <Numerical Value Only, Replace Slashes with the number 1>
|
36 |
+
Total Assessed Value: -||-
|
37 |
+
Previous Owner: <Person's Name>
|
38 |
+
Effectivity of Assessment: -||-
|
39 |
+
Record Person: <Person's Name>
|
40 |
+
Date: -||-
|
41 |
+
'''
|
42 |
+
RPFAAP1 = '''
|
43 |
+
Desired Format:
|
44 |
+
ARP No.: <Numerical Value Only, Replace Slashes with the number 1>
|
45 |
+
Owner: <Person's Name>
|
46 |
+
Address: -||-
|
47 |
+
Tel No.: -||-
|
48 |
+
Administrator/Beneficial User: -||-
|
49 |
+
Address: -||-
|
50 |
+
Tel No.: -||-
|
51 |
+
PIN: <Numerical Value Only, Replace Slashes with the number 1>
|
52 |
+
TIN_1: <Numerical Value Only, Replace Slashes with the number 1>
|
53 |
+
TIN_2: <Numerical Value Only, Replace Slashes with the number 1>
|
54 |
+
'''
|
55 |
+
# RPFAAP1 = '''
|
56 |
+
# Desired Format:
|
57 |
+
# ARP No.: <Numerical Value Only, Replace Slashes with the number 1>
|
58 |
+
# Owner: <Person's Name>
|
59 |
+
# Address: -||-
|
60 |
+
# Tel No.: -||-
|
61 |
+
# Administrator/Beneficial User: -||-
|
62 |
+
# Address: -||-
|
63 |
+
# Tel No.: -||-
|
64 |
+
# PIN: <Numerical Value Only, Replace Slashes with the number 1>
|
65 |
+
# TIN_1: <Numerical Value Only, Replace Slashes with the number 1>
|
66 |
+
# TIN_2: <Numerical Value Only, Replace Slashes with the number 1>
|
67 |
+
# No. / Street: -||-
|
68 |
+
# Brgy/District: -||-
|
69 |
+
# Municipality: -||-
|
70 |
+
# Province/city: -||-
|
71 |
+
# Owner: <Person's Name>
|
72 |
+
# OCT/TCT/CLOA No.: -||-
|
73 |
+
# Lot No.: -||-
|
74 |
+
# Survey No.: -||-
|
75 |
+
# Blk No.: -||-
|
76 |
+
# TD/ARP No.: <Numerical Value Only, Replace Slashes with the number 1>
|
77 |
+
# Area: <Area in SQ.M.>
|
78 |
+
# Kind of Bldg.: -||-
|
79 |
+
# Structural Type: -||-
|
80 |
+
# Bldg. Permit No.: -||-
|
81 |
+
# Date Issued: -||-
|
82 |
+
# Condominium Certificate of Title(CCT): -||-
|
83 |
+
# Certificate of Completion Issued on: -||-
|
84 |
+
# Certificate of Occupancy Issued on: -||-
|
85 |
+
# Date Constructed/Completed: -||-
|
86 |
+
# Date Occupied: -||-
|
87 |
+
# Bldg. Age: -||-
|
88 |
+
# No. of Storeys: -||-
|
89 |
+
# Area of 1st Flr: <Numerical Value in SQ.M.>
|
90 |
+
# Area of 2nd Flr: <Numerical Value in SQ.M.>
|
91 |
+
# Area of 3rd Flr: <Numerical Value in SQ.M.>
|
92 |
+
# Area of 4th Flr: <Numerical Value in SQ.M.>
|
93 |
+
# Total Floor Area: <Numerical Value in SQ.M.>
|
94 |
+
# '''
|