Spaces:
Sleeping
Sleeping
from cnocr import CnOcr | |
import openai | |
from dotenv import load_dotenv | |
import os | |
import json | |
def model0(path): | |
ocr = CnOcr(rec_model_name='en_PP-OCRv3') | |
out = ocr.ocr(path) | |
print(out) | |
load_dotenv() | |
openai.api_key = os.environ.get("data-extraction-api") | |
invalid_list = [' ',','] | |
data_set_1 = [] | |
for item in out: | |
if item['text'] not in invalid_list: | |
data_set_1.append(item['text']) | |
completion = openai.ChatCompletion.create( | |
model = "gpt-3.5-turbo", | |
temperature = 0, | |
messages = [ | |
{"role": "system", "content": "You are an AI assistant for extracting data from HKID card with following information \ | |
(name, HKID number, date of issue) from HKID card. Uppercase and lowercase letters are the same. Store the results in \ | |
dictionary format"}, | |
{"role": "user", "content": f"Extract data from the following set of text: {data_set_1}. \ | |
You have three types of data to extract. \ | |
1. id card holder full name (it noramlly is a chinese name, including surname and family \ | |
name in English spelling, and it may be separate in different fields in the data set for surname and family name \ | |
sometimes) \ | |
2. issue date (should be a date with month and day, e.g. 19-97 is the required format, but 26-11-18 is not \ | |
because date of issue of have 5 characters) Only choose valid format!!! \ | |
3. HKID number (The standard format of HKID number is @123456(#) e.g. A123456(7) is a valid HKID number. \ | |
(a) @ represents any one or two capital letters of the alphabet. \ | |
(b) # is the check digit which has 11 possible values from 0 to 9 and A.) \ | |
Remember to include the check digit with () \ | |
Only reply a dictionary. No need to add other words or explanation. Use double quote for dictionary."}, | |
] | |
) | |
data = completion['choices'][0]['message']['content'] | |
print(data) | |
id_data = json.loads(data) | |
print(id_data) | |
return | |
# return [name, valid_hkid, hkid, issuedate] | |
model0('dontTouchMe/IMG_4499.jpg') |