Spaces:

OneFi
/

hf-similarity-check

Sleeping

hf-similarity-check / get_chinese_name.py

Upload 2 files

149bc14 almost 2 years ago

1.15 kB

	from cnocr import CnOcr
	import openai
	from dotenv import load_dotenv
	import os
	import json

	def get_chiname(path):
	ocr = CnOcr(rec_model_name='chinese_cht_PP-OCRv3')
	out = ocr.ocr(path)

	print(out)

	load_dotenv()
	openai.api_key = os.environ.get("data-extraction-api")

	invalid_list = [' ',',']
	data_set_1 = []
	for item in out:
	if item['text'] not in invalid_list:
	data_set_1.append(item['text'])

	completion = openai.ChatCompletion.create(
	model = "gpt-3.5-turbo",
	temperature = 0,
	messages = [
	{"role": "system", "content": "You are an AI assistant for extracting Chinese name (usually in length of three) from Hong Kong ID card."},
	{"role": "user", "content": f"Extract data from the following set of text: {data_set_1}. \
	You only need to return a dictonary with Chinese name in Chinese. Use double quote."},
	]
	)

	data = completion['choices'][0]['message']['content']

	# print(data)

	id_data = json.loads(data)

	# print(id_data)
	return id_data
	# return [name, valid_hkid, hkid, issuedate]