Spaces:
Sleeping
Sleeping
import re | |
def check_integer(string): | |
if string.isdigit(): | |
return True | |
for char in string: | |
if char.isdigit(): | |
return True | |
return False | |
def check_alpha(string): | |
for char in string: | |
if not ((char >= 'a' and char <= 'z') or (char >= 'A' and char <= 'Z') or char == ' '): | |
return False | |
return True | |
def is_chinese_name(text): | |
substrings = [text[:1], text[:2], text[:3], text[:4], text[:5], text[:6], text[:7], text[:8]] | |
if len(text) > 40: | |
return False | |
for substring in substrings: | |
upper_case_sum = 0 | |
lower_case_sum = 0 | |
space = 0 | |
for char in substring: | |
if char >= 'A' and char <= 'Z': | |
upper_case_sum += 1 | |
if char >= 'a' and char <= 'z': | |
lower_case_sum += 1 | |
if char == ' ': | |
space += 1 | |
if upper_case_sum >= 3 and lower_case_sum >= 2 and space >= 1: | |
return True | |
return False | |
def seperate_name(text): | |
word1 = "" | |
word2 = "" | |
word3 = "" | |
name = text.replace(' ', '') | |
# l = 0 | |
# space = 0 | |
# for char in text: | |
# if char >= 'A' and char <= 'Z': | |
# l += 1 | |
# if char != ' ': | |
# space += 1 | |
# else: | |
# word2 = text[l-1:space] | |
# word3 = text[space+1::] | |
# word1 = text[:l - 2] | |
# # only two characters | |
# if space == len(text): | |
# word1 = text[:l-1] | |
# word2 = text[l-1::] | |
# name = word1 + ' ' + word2 | |
# else: | |
# name = word1 + ' ' + word2 + ' ' + word3 | |
return name.lower() | |
def validate_hkid(hkid): # omit parentheses | |
hkid = hkid.replace('(', '').replace(')', '') | |
weight = [9, 8, 7, 6, 5, 4, 3, 2, 1] | |
values = list('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ') + [None] | |
match = re.match('^([A-Z])?([A-Z])([0-9]{6})([0-9A])$', hkid) | |
if not match: return False | |
hkidArr = [] | |
for g in match.groups(): | |
hkidArr += list(g) if g else [g] | |
r = sum([values.index(i) * w for i, w in zip(hkidArr, weight)]) % 11 | |
return r == 0 | |
def format_HKID(hkid): | |
hkid = hkid.replace('(', '').replace(')', '') | |
idlen = len(hkid) | |
match = re.match('^([A-Z])?([A-Z])([0-9]{6})([0-9A])$', hkid) | |
hkidArr = [] | |
for g in match.groups(): | |
hkidArr += list(g) if g else [g] | |
formatted_hkid = '' | |
index = 0 | |
for char in hkidArr: | |
if char != None: | |
formatted_hkid += char | |
if index == idlen - 1: | |
formatted_hkid += '(' | |
if index == idlen: | |
formatted_hkid += ')' | |
index += 1 | |
return formatted_hkid | |
def format_issuedate(issuedate): | |
formatted_issuedate = issuedate.replace('(', '').replace(')', '') | |
formatted_issuedate = formatted_issuedate.replace('C', '') | |
return formatted_issuedate | |
def is_string_integer(string): | |
try: | |
int(string) # Attempt to convert the string to an integer | |
return True # If successful, the string only contains integers | |
except ValueError: | |
return False # If a ValueError occurs, the string doesn't only contain integers | |
def check_issuedate(text): | |
if len(text) < 5 and len(text) > 7 : | |
return False | |
if len(text) > 0 and text[0] == '(': | |
text = text.replace('(', '') | |
elif len(text) > 0 and text[0] == 'C': | |
text = text.replace('C', '') | |
if len(text) > 0 and text[-1] == ')': | |
text = text.replace(')', '') | |
if len(text) != 5: | |
return False | |
if text[2] != '-': | |
return False | |
text = text.replace('-', '') | |
if not is_string_integer(text): | |
return False | |
return True | |
def print_info(name, valid_hkid, hkid, issuedate): | |
print(f'Name: {name}') | |
print(f'HKID: {hkid} and validity: {valid_hkid}') | |
print(f'Date of issue: {issuedate}') | |
def is_comma_present(string): | |
return ',' in string | |
def longest_common_subsequence(s1, s2): | |
m, n = len(s1), len(s2) | |
# Create a 2D table to store the lengths of common subsequences | |
dp = [[0] * (n + 1) for _ in range(m + 1)] | |
# Build the table in a bottom-up manner | |
for i in range(1, m + 1): | |
for j in range(1, n + 1): | |
if s1[i - 1] == s2[j - 1]: | |
dp[i][j] = dp[i - 1][j - 1] + 1 | |
else: | |
dp[i][j] = max(dp[i - 1][j], dp[i][j - 1]) | |
# Retrieve the longest common subsequence | |
lcs = [] | |
i, j = m, n | |
while i > 0 and j > 0: | |
if s1[i - 1] == s2[j - 1]: | |
lcs.append(s1[i - 1]) | |
i -= 1 | |
j -= 1 | |
elif dp[i - 1][j] > dp[i][j - 1]: | |
i -= 1 | |
else: | |
j -= 1 | |
# Reverse the sequence to get the correct order | |
lcs.reverse() | |
return ''.join(lcs) | |
def combine_info(info1, info2): | |
combined_info = [] | |
print(info1) | |
print(info2) | |
if info1[0] == info2[0]: | |
combined_info.append(info1[0]) # Append the variable as-is if it's the same in both models | |
elif info1[0] == '': | |
combined_info.append(info2[0]) | |
elif info2[0] == '': | |
combined_info.append(info1[0]) | |
else: | |
subseq = longest_common_subsequence(info1[0], info2[0]) | |
combined_info.append(subseq) | |
if info1[1] == 'True' and info2[1] == 'False': | |
combined_info.append(info1[1]) | |
combined_info.append(info1[2]) | |
elif info1[1] == 'False' and info2[1] == 'True': | |
combined_info.append(info2[1]) | |
combined_info.append(info2[2]) | |
elif info1[1] == 'True' and info2[1] == 'True': | |
if info1[2] == info2[2]: | |
combined_info.append(info1[1]) | |
combined_info.append(info1[2]) | |
else: | |
combined_info.append('False') | |
combined_info.append('Suspicous HKID') | |
if info1[3] == info2[3]: | |
combined_info.append(info1[3]) | |
else: | |
combined_info.append('Unmatched issuedate') | |
# print(combined_info) | |
return combined_info | |
# info1 = ['', 'True', 'Z683365(5)', '06-96'] | |
# info2 = ['lok wing', 'False', 'Z68336505)', '06-96'] | |
# info = combine_info(info1, info2) | |
# print_info(*info) | |
# text = 'TAMKing Man' | |
# if is_comma_present(text): | |
# text = text.replace(',', '') | |
# if not check_integer(text): | |
# if check_alpha(text) and is_chinese_name(text): | |
# name = seperate_name(text) |