Spaces:
Sleeping
Sleeping
Upload model0.py
Browse files
model0.py
CHANGED
@@ -3,6 +3,7 @@ import openai
|
|
3 |
from dotenv import load_dotenv
|
4 |
import os
|
5 |
import json
|
|
|
6 |
|
7 |
def model0(path):
|
8 |
ocr = CnOcr(rec_model_name='en_PP-OCRv3')
|
@@ -19,25 +20,29 @@ def model0(path):
|
|
19 |
if item['text'] not in invalid_list:
|
20 |
data_set_1.append(item['text'])
|
21 |
|
|
|
|
|
22 |
completion = openai.ChatCompletion.create(
|
23 |
model = "gpt-3.5-turbo",
|
24 |
temperature = 0,
|
25 |
messages = [
|
26 |
{"role": "system", "content": "You are an AI assistant for extracting data from HKID card with following information \
|
27 |
-
(name,
|
28 |
dictionary format"},
|
29 |
{"role": "user", "content": f"Extract data from the following set of text: {data_set_1}. \
|
30 |
You have three types of data to extract. \
|
31 |
1. id card holder full name (it noramlly is a chinese name, including surname and family \
|
32 |
name in English spelling, and it may be separate in different fields in the data set for surname and family name \
|
33 |
sometimes) \
|
34 |
-
2.
|
35 |
-
because date of
|
36 |
-
3.
|
|
|
37 |
(a) @ represents any one or two capital letters of the alphabet. \
|
38 |
(b) # is the check digit which has 11 possible values from 0 to 9 and A.) \
|
39 |
Remember to include the check digit with () \
|
40 |
Only reply a dictionary. No need to add other words or explanation. Use double quote for dictionary."},
|
|
|
41 |
]
|
42 |
)
|
43 |
|
@@ -46,9 +51,17 @@ def model0(path):
|
|
46 |
print(data)
|
47 |
|
48 |
id_data = json.loads(data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
print(id_data)
|
51 |
-
return
|
52 |
# return [name, valid_hkid, hkid, issuedate]
|
53 |
-
|
54 |
-
model0('dontTouchMe/IMG_4499.jpg')
|
|
|
3 |
from dotenv import load_dotenv
|
4 |
import os
|
5 |
import json
|
6 |
+
import checkTool
|
7 |
|
8 |
def model0(path):
|
9 |
ocr = CnOcr(rec_model_name='en_PP-OCRv3')
|
|
|
20 |
if item['text'] not in invalid_list:
|
21 |
data_set_1.append(item['text'])
|
22 |
|
23 |
+
print(f'All data here: {data_set_1}')
|
24 |
+
|
25 |
completion = openai.ChatCompletion.create(
|
26 |
model = "gpt-3.5-turbo",
|
27 |
temperature = 0,
|
28 |
messages = [
|
29 |
{"role": "system", "content": "You are an AI assistant for extracting data from HKID card with following information \
|
30 |
+
(name, date of birth, date of issue, HKID number) from HKID card. Uppercase and lowercase letters are the same. Store the results in \
|
31 |
dictionary format"},
|
32 |
{"role": "user", "content": f"Extract data from the following set of text: {data_set_1}. \
|
33 |
You have three types of data to extract. \
|
34 |
1. id card holder full name (it noramlly is a chinese name, including surname and family \
|
35 |
name in English spelling, and it may be separate in different fields in the data set for surname and family name \
|
36 |
sometimes) \
|
37 |
+
2. date of birth (should be a date with year, month and day, e.g. 23-02-2003 is the required format, but 26-11 is not \
|
38 |
+
because date of birth should have 10 characters) Only choose valid format!!!\
|
39 |
+
3. date of issue (a string with format xx-xx) \
|
40 |
+
4. HKID number (The standard format of HKID number is @123456(#) e.g. A123456(7) is a valid HKID number. \
|
41 |
(a) @ represents any one or two capital letters of the alphabet. \
|
42 |
(b) # is the check digit which has 11 possible values from 0 to 9 and A.) \
|
43 |
Remember to include the check digit with () \
|
44 |
Only reply a dictionary. No need to add other words or explanation. Use double quote for dictionary."},
|
45 |
+
|
46 |
]
|
47 |
)
|
48 |
|
|
|
51 |
print(data)
|
52 |
|
53 |
id_data = json.loads(data)
|
54 |
+
|
55 |
+
name = id_data["name"]
|
56 |
+
dateofbirth = id_data["date of birth"]
|
57 |
+
issuedate = id_data["date of issue"]
|
58 |
+
hkid = id_data["HKID number"]
|
59 |
+
if checkTool.validate_hkid(hkid=hkid):
|
60 |
+
valid_hkid = 'True'
|
61 |
+
else:
|
62 |
+
valid_hkid = 'False'
|
63 |
+
name = checkTool.seperate_name(name)
|
64 |
|
65 |
print(id_data)
|
66 |
+
return [name, valid_hkid, hkid, issuedate, dateofbirth]
|
67 |
# return [name, valid_hkid, hkid, issuedate]
|
|
|
|