Spaces:
Sleeping
Sleeping
from cnocr import CnOcr | |
import pandas as pd | |
def check_telecode(input_string): | |
if len(input_string) == 12: | |
try: | |
int(input_string) | |
return True | |
except ValueError: | |
return False | |
if len(input_string) == 8: | |
try: | |
int(input_string) | |
return True | |
except ValueError: | |
return False | |
return False | |
def extract_integers(input_string): | |
if len(input_string) == 12: | |
w1 = input_string[:4] | |
w2 = input_string[4:8] | |
w3 = input_string[8:] | |
return w1, w2, w3 | |
elif len(input_string) == 8: | |
w1 = input_string[:4] | |
w2 = input_string[4:] | |
return w1, w2 | |
else: | |
return None, None, None | |
def get_chinese_name(path): | |
ocr = CnOcr(rec_model_name='en_PP-OCRv3') | |
# ocr = CnOcr(rec_model_name='densenet_lite_136-fc') | |
out = ocr.ocr(path) | |
df = pd.read_csv('hkTelecode.csv', dtype={'code': str}, index_col=False) | |
chinese_name = [] | |
for data in out: | |
text = data['text'] | |
text = text.replace(' ', '') | |
if check_telecode(text): | |
w1, w2, w3 = extract_integers(text) | |
print(w1) | |
print(w2) | |
print(w3) | |
chinese_name.append(df['word'][df['code'] == str(w1)].iloc[0]) | |
chinese_name.append(df['word'][df['code'] == str(w2)].iloc[0]) | |
if w3 is not None: | |
chinese_name.append(df['word'][df['code'] == str(w3)].iloc[0]) | |
return chinese_name | |
chinese_name = [] | |
return [] | |
print(get_chinese_name('dontTouchMe/IMG_4495.jpg')) | |