Spaces:

OneFi
/

hf-similarity-check

Sleeping

App Files Files Community

Mitul Mohammad Abdullah Al Mukit commited on Jul 3, 2023

Commit

1f72938

1 Parent(s): ac8d65b

first commit

Browse files

Files changed (43) hide show

.gitignore +5 -0
Visualization_utilities.py +189 -0
__pycache__/Visualization_utilities.cpython-311.pyc +0 -0
__pycache__/Visualization_utilities.cpython-39.pyc +0 -0
__pycache__/checkTool.cpython-311.pyc +0 -0
__pycache__/checkTool.cpython-39.pyc +0 -0
__pycache__/data_encryption.cpython-311.pyc +0 -0
__pycache__/data_encryption.cpython-39.pyc +0 -0
__pycache__/demo.cpython-311.pyc +0 -0
__pycache__/demo.cpython-39.pyc +0 -0
__pycache__/extract_pdf.cpython-311.pyc +0 -0
__pycache__/extract_pdf.cpython-39.pyc +0 -0
__pycache__/imageSegmentation.cpython-311.pyc +0 -0
__pycache__/imageSegmentation.cpython-39.pyc +0 -0
__pycache__/model1.cpython-311.pyc +0 -0
__pycache__/model1.cpython-39.pyc +0 -0
__pycache__/model2.cpython-311.pyc +0 -0
__pycache__/model2.cpython-39.pyc +0 -0
__pycache__/similarity_check.cpython-311.pyc +0 -0
__pycache__/similarity_check.cpython-39.pyc +0 -0
__pycache__/webapp.cpython-311.pyc +0 -0
blaze_face_short_range.tflite +3 -0
checkTool.py +227 -0
data1.txt +1 -0
data_encryption.py +12 -0
demo.py +185 -0
extract_pdf.py +139 -0
extraction_data.py +96 -0
imageSegmentation.py +60 -0
model1.py +46 -0
model2.py +46 -0
pubkey.pem +13 -0
request_json/__pycache__/sbt_request_generator.cpython-311.pyc +0 -0
request_json/__pycache__/sbt_request_generator.cpython-39.pyc +0 -0
request_json/request_legalDocument.json +75 -0
request_json/sbt_request_generator.py +108 -0
requirements.txt +15 -0
sbt/deployment.py +3 -0
sbt_request.txt +22 -0
similarity_check.py +89 -0
test.py +3 -0
text_reader_v2.py +18 -0
webapp.py +209 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+image/*
+saved/*
+image
+saved
+.DS_Store

Visualization_utilities.py ADDED Viewed

	@@ -0,0 +1,189 @@

+import cv2
+import mediapipe as mp
+from mediapipe import solutions
+from mediapipe.framework.formats import landmark_pb2
+import numpy as np
+import math
+# visualization libraries
+import matplotlib.pyplot as plt
+import matplotlib.animation as animation
+from matplotlib import style
+def draw_eyes_on_image(rgb_image, detection_result):
+  # return rgb_image, 0, 0
+  # canonical_face_model_uv_visualization in the below link
+  # https://github.com/google/mediapipe/blob/a908d668c730da128dfa8d9f6bd25d519d006692/mediapipe/modules/face_geometry/data/canonical_face_model_uv_visualization.png
+  left_eyes_bottom_list = [33, 7, 163, 144, 145, 153, 154, 155, 133]
+  left_eyes_top_list = [246, 161, 160, 159, 158, 157, 173]
+  right_eyes_bottom_list = [362, 382, 381, 380, 374, 373, 390, 249, 263]
+  right_eyes_top_list = [398, 384, 385, 386, 387, 388, 466]
+  face_landmarks_list = detection_result.face_landmarks
+  annotated_image = np.copy(rgb_image)
+  # We resize image to 640 * 360
+  height, width, channels = rgb_image.shape
+  # Loop through the detected faces to visualize. Actually, if we detect more than two faces, we will require user closer to the camera
+  for idx in range(len(face_landmarks_list)):
+    face_landmarks = face_landmarks_list[idx]
+    mlist = []
+    for landmark in face_landmarks:
+      mlist.append([int(landmark.x * width), int(landmark.y * height), landmark.z])
+    narray = np.copy(mlist)
+    # Vertical line
+    #
+    #
+    # Pick the largest difference (middle of the eyes)
+    leftUp = narray[159]
+    leftDown = narray[145]
+    rightUp = narray[386]
+    rightDown = narray[374]
+    # compute left eye distance (vertical)
+    leftUp_x = int(leftUp[0])
+    leftUp_y = int(leftUp[1])
+    leftDown_x = int(leftDown[0])
+    leftDown_y = int(leftDown[1])
+    leftVerDis = math.dist([leftUp_x, leftUp_y],[leftDown_x, leftDown_y])
+    # compute right eye distance (vertical)
+    rightUp_x = int(rightUp[0])
+    rightUp_y = int(rightUp[1])
+    rightDown_x = int(rightDown[0])
+    rightDown_y = int(rightDown[1])
+    rightVerDis = math.dist([rightUp_x, rightUp_y],[rightDown_x, rightDown_y])
+    # print(f'leftVerDis: {leftVerDis} and rightVerDis: {rightVerDis}')
+    # draw a line from left eye top to bottom
+    annotated_image = cv2.line(rgb_image, (int(leftUp_x), int(leftUp_y)), (int(leftDown_x), int(leftDown_y)), (0, 200, 0), 1)
+    # draw a line from right eye top to bottom
+    annotated_image = cv2.line(rgb_image, (int(rightUp_x), int(rightUp_y)), (int(rightDown_x), int(rightDown_y)), (0, 200, 0), 1)
+    #
+    #
+    # Horizontonal line
+    #
+    #
+    # Pick the largest difference (middle of the eyes)
+    leftLeft = narray[33]
+    leftRight = narray[133]
+    rightLeft = narray[362]
+    rightRight = narray[263]
+    # compute left eye distance (horizontal)
+    leftLeft_x = int(leftLeft[0])
+    leftLeft_y = int(leftLeft[1])
+    leftRight_x = int(leftRight[0])
+    leftRight_y = int(leftRight[1])
+    leftHorDis = math.dist([leftLeft_x, leftLeft_y],[leftRight_x, leftRight_y])
+    # compute right eye distance (horizontal)
+    rightLeft_x = int(rightLeft[0])
+    rightLeft_y = int(rightLeft[1])
+    rightRight_x = int(rightRight[0])
+    rightRight_y = int(rightRight[1])
+    rightHorDis = math.dist([rightLeft_x, rightLeft_y],[rightRight_x, rightRight_y])
+    # print(f'leftHorDis: {leftHorDis} and rightHorDis: {rightHorDis}')
+    # draw a line from left eye top to bottom
+    annotated_image = cv2.line(rgb_image, (int(leftLeft_x), int(leftLeft_y)), (int(leftRight_x), int(leftRight_y)), (0, 200, 0), 1)
+    # draw a line from right eye top to bottom
+    annotated_image = cv2.line(rgb_image, (int(rightLeft_x), int(rightLeft_y)), (int(rightRight_x), int(rightRight_y)), (0, 200, 0), 1)
+    #
+    #
+    #
+    #
+    # print(f'leftRatio: {leftVerDis/leftHorDis} and rightRatio: {rightVerDis/rightHorDis}')
+    leftRatio = leftVerDis/leftHorDis*100
+    rightRatio = rightVerDis/rightHorDis*100
+    # left_eyes_bottom = [narray[x] for x in left_eyes_bottom_list]
+    # left_eyes_top = [narray[x] for x in left_eyes_top_list]
+    # right_eyes_bottom = [narray[x] for x in right_eyes_bottom_list]
+    # right_eyes_top = [narray[x] for x in right_eyes_top_list]
+    # for p in left_eyes_bottom:
+    #   annotated_image = cv2.circle(rgb_image, (int(p[0]), int(p[1])), radius=1, color=(0,0,255), thickness=1)
+    # for p in left_eyes_top:
+    #   annotated_image = cv2.circle(rgb_image, (int(p[0]), int(p[1])), radius=1, color=(0,0,255), thickness=1)
+    # for p in right_eyes_bottom:
+    #   annotated_image = cv2.circle(rgb_image, (int(p[0]), int(p[1])), radius=1, color=(0,0,255), thickness=1)
+    # for p in right_eyes_top:
+    #   annotated_image = cv2.circle(rgb_image, (int(p[0]), int(p[1])), radius=1, color=(0,0,255), thickness=1)
+  return annotated_image, leftRatio, rightRatio
+def draw_landmarks_on_image(rgb_image, detection_result):
+  face_landmarks_list = detection_result.face_landmarks
+  annotated_image = np.copy(rgb_image)
+  # Loop through the detected faces to visualize. Actually, if we detect more than two faces, we will require user closer to the camera
+  for idx in range(len(face_landmarks_list)):
+    face_landmarks = face_landmarks_list[idx]
+    # Draw the face landmarks.
+    face_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
+    face_landmarks_proto.landmark.extend([
+      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in face_landmarks
+    ])
+    solutions.drawing_utils.draw_landmarks(
+        image=annotated_image,
+        landmark_list=face_landmarks_proto,
+        connections=mp.solutions.face_mesh.FACEMESH_TESSELATION,
+        landmark_drawing_spec=None,
+        connection_drawing_spec=mp.solutions.drawing_styles
+        .get_default_face_mesh_tesselation_style())
+    solutions.drawing_utils.draw_landmarks(
+        image=annotated_image,
+        landmark_list=face_landmarks_proto,
+        connections=mp.solutions.face_mesh.FACEMESH_CONTOURS,
+        landmark_drawing_spec=None,
+        connection_drawing_spec=mp.solutions.drawing_styles
+        .get_default_face_mesh_contours_style())
+    solutions.drawing_utils.draw_landmarks(
+        image=annotated_image,
+        landmark_list=face_landmarks_proto,
+        connections=mp.solutions.face_mesh.FACEMESH_IRISES,
+          landmark_drawing_spec=None,
+          connection_drawing_spec=mp.solutions.drawing_styles
+          .get_default_face_mesh_iris_connections_style())
+  return annotated_image
+def plot_face_blendshapes_bar_graph(face_blendshapes):
+  # Extract the face blendshapes category names and scores.
+  face_blendshapes_names = [face_blendshapes_category.category_name for face_blendshapes_category in face_blendshapes]
+  face_blendshapes_scores = [face_blendshapes_category.score for face_blendshapes_category in face_blendshapes]
+  # The blendshapes are ordered in decreasing score value.
+  face_blendshapes_ranks = range(len(face_blendshapes_names))
+  fig, ax = plt.subplots(figsize=(12, 12))
+  bar = ax.barh(face_blendshapes_ranks, face_blendshapes_scores, label=[str(x) for x in face_blendshapes_ranks])
+  ax.set_yticks(face_blendshapes_ranks, face_blendshapes_names)
+  ax.invert_yaxis()
+  # Label each bar with values
+  for score, patch in zip(face_blendshapes_scores, bar.patches):
+    plt.text(patch.get_x() + patch.get_width(), patch.get_y(), f"{score:.4f}", va="top")
+  ax.set_xlabel('Score')
+  ax.set_title("Face Blendshapes")
+  plt.tight_layout()
+  plt.show()

__pycache__/Visualization_utilities.cpython-311.pyc ADDED Viewed

Binary file (8.96 kB). View file

__pycache__/Visualization_utilities.cpython-39.pyc ADDED Viewed

Binary file (4.37 kB). View file

__pycache__/checkTool.cpython-311.pyc ADDED Viewed

Binary file (9.76 kB). View file

__pycache__/checkTool.cpython-39.pyc ADDED Viewed

Binary file (4.84 kB). View file

__pycache__/data_encryption.cpython-311.pyc ADDED Viewed

Binary file (1.11 kB). View file

__pycache__/data_encryption.cpython-39.pyc ADDED Viewed

Binary file (625 Bytes). View file

__pycache__/demo.cpython-311.pyc ADDED Viewed

Binary file (4.65 kB). View file

__pycache__/demo.cpython-39.pyc ADDED Viewed

Binary file (2.61 kB). View file

__pycache__/extract_pdf.cpython-311.pyc ADDED Viewed

Binary file (5.92 kB). View file

__pycache__/extract_pdf.cpython-39.pyc ADDED Viewed

Binary file (3.21 kB). View file

__pycache__/imageSegmentation.cpython-311.pyc ADDED Viewed

Binary file (2.55 kB). View file

__pycache__/imageSegmentation.cpython-39.pyc ADDED Viewed

Binary file (1.49 kB). View file

__pycache__/model1.cpython-311.pyc ADDED Viewed

Binary file (1.6 kB). View file

__pycache__/model1.cpython-39.pyc ADDED Viewed

Binary file (902 Bytes). View file

__pycache__/model2.cpython-311.pyc ADDED Viewed

Binary file (1.76 kB). View file

__pycache__/model2.cpython-39.pyc ADDED Viewed

Binary file (968 Bytes). View file

__pycache__/similarity_check.cpython-311.pyc ADDED Viewed

Binary file (3.12 kB). View file

__pycache__/similarity_check.cpython-39.pyc ADDED Viewed

Binary file (1.75 kB). View file

__pycache__/webapp.cpython-311.pyc ADDED Viewed

Binary file (4.58 kB). View file

blaze_face_short_range.tflite ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4578f35940bf5a1a655214a1cce5cab13eba73c1297cd78e1a04c2380b0152f
+size 229746

checkTool.py ADDED Viewed

	@@ -0,0 +1,227 @@

+import re
+def check_integer(string):
+    if string.isdigit():
+        return True
+    for char in string:
+        if char.isdigit():
+            return True
+    return False
+def check_alpha(string):
+    for char in string:
+        if not ((char >= 'a' and char <= 'z') or (char >= 'A' and char <= 'Z') or char == ' '):
+            return False
+    return True
+def is_chinese_name(text):
+    substrings = [text[:1], text[:2], text[:3], text[:4], text[:5], text[:6], text[:7], text[:8]]
+    if len(text) > 40:
+        return False
+    for substring in substrings:
+        upper_case_sum = 0
+        lower_case_sum = 0
+        space = 0
+        for char in substring:
+            if char >= 'A' and char <= 'Z':
+                upper_case_sum += 1
+            if char >= 'a' and char <= 'z':
+                lower_case_sum += 1
+            if char == ' ':
+                space += 1
+        if upper_case_sum >= 3 and lower_case_sum >= 2 and space >= 1:
+            return True
+    return False
+def seperate_name(text):
+    word1 = ""
+    word2 = ""
+    word3 = ""
+    name = text.replace(' ', '')
+    # l = 0
+    # space = 0
+    # for char in text:
+    #     if char >= 'A' and char <= 'Z':
+    #         l += 1
+    #     if char != ' ':
+    #         space += 1
+    #     else:
+    #         word2 = text[l-1:space]
+    #         word3 = text[space+1::]
+    # word1 = text[:l - 2]
+    # # only two characters
+    # if space == len(text):
+    #     word1 = text[:l-1]
+    #     word2 = text[l-1::]
+    #     name = word1 + ' ' + word2
+    # else:
+    #     name = word1 + ' ' + word2 + ' ' + word3
+    return name.lower()
+def validate_hkid(hkid): # omit parentheses
+    hkid = hkid.replace('(', '').replace(')', '')
+    weight = [9, 8, 7, 6, 5, 4, 3, 2, 1]
+    values = list('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ') + [None]
+    match = re.match('^([A-Z])?([A-Z])([0-9]{6})([0-9A])$', hkid)
+    if not match: return False
+    hkidArr = []
+    for g in match.groups():
+        hkidArr += list(g) if g else [g]
+    r = sum([values.index(i) * w for i, w in zip(hkidArr, weight)]) % 11
+    return r == 0
+def format_HKID(hkid):
+    hkid = hkid.replace('(', '').replace(')', '')
+    idlen = len(hkid)
+    match = re.match('^([A-Z])?([A-Z])([0-9]{6})([0-9A])$', hkid)
+    hkidArr = []
+    for g in match.groups():
+        hkidArr += list(g) if g else [g]
+    formatted_hkid = ''
+    index = 0
+    for char in hkidArr:
+        if char != None:
+            formatted_hkid += char
+        if index == idlen - 1:
+            formatted_hkid += '('
+        if index == idlen:
+            formatted_hkid += ')'
+        index += 1
+    return formatted_hkid
+def format_issuedate(issuedate):
+    formatted_issuedate = issuedate.replace('(', '').replace(')', '')
+    formatted_issuedate = formatted_issuedate.replace('C', '')
+    return formatted_issuedate
+def is_string_integer(string):
+    try:
+        int(string)  # Attempt to convert the string to an integer
+        return True  # If successful, the string only contains integers
+    except ValueError:
+        return False  # If a ValueError occurs, the string doesn't only contain integers
+def check_issuedate(text):
+    if len(text) < 5 and len(text) > 7 :
+        return False
+    if len(text) > 0 and text[0] == '(':
+        text = text.replace('(', '')
+    elif len(text) > 0 and text[0] == 'C':
+        text = text.replace('C', '')
+    if len(text) > 0 and text[-1] == ')':
+        text = text.replace(')', '')
+    if len(text) != 5:
+        return False
+    if text[2] != '-':
+        return False
+    text = text.replace('-', '')
+    if not is_string_integer(text):
+        return False
+    return True
+def print_info(name, valid_hkid, hkid, issuedate):
+    print(f'Name: {name}')
+    print(f'HKID: {hkid} and validity: {valid_hkid}')
+    print(f'Date of issue: {issuedate}')
+def is_comma_present(string):
+    return ',' in string
+def longest_common_subsequence(s1, s2):
+    m, n = len(s1), len(s2)
+    # Create a 2D table to store the lengths of common subsequences
+    dp = [[0] * (n + 1) for _ in range(m + 1)]
+    # Build the table in a bottom-up manner
+    for i in range(1, m + 1):
+        for j in range(1, n + 1):
+            if s1[i - 1] == s2[j - 1]:
+                dp[i][j] = dp[i - 1][j - 1] + 1
+            else:
+                dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])
+    # Retrieve the longest common subsequence
+    lcs = []
+    i, j = m, n
+    while i > 0 and j > 0:
+        if s1[i - 1] == s2[j - 1]:
+            lcs.append(s1[i - 1])
+            i -= 1
+            j -= 1
+        elif dp[i - 1][j] > dp[i][j - 1]:
+            i -= 1
+        else:
+            j -= 1
+    # Reverse the sequence to get the correct order
+    lcs.reverse()
+    return ''.join(lcs)
+def combine_info(info1, info2):
+    combined_info = []
+    print(info1)
+    print(info2)
+    if info1[0] == info2[0]:
+        combined_info.append(info1[0])  # Append the variable as-is if it's the same in both models
+    elif info1[0] == '':
+        combined_info.append(info2[0])
+    elif info2[0] == '':
+        combined_info.append(info1[0])
+    else:
+        subseq = longest_common_subsequence(info1[0], info2[0])
+        combined_info.append(subseq)
+    if info1[1] == 'True' and info2[1] == 'False':
+        combined_info.append(info1[1])
+        combined_info.append(info1[2])
+    elif info1[1] == 'False' and info2[1] == 'True':
+        combined_info.append(info2[1])
+        combined_info.append(info2[2])
+    elif info1[1] == 'True' and info2[1] == 'True':
+        if info1[2] == info2[2]:
+            combined_info.append(info1[1])
+            combined_info.append(info1[2])
+    else:
+        combined_info.append('False')
+        combined_info.append('Suspicous HKID')
+    if info1[3] == info2[3]:
+        combined_info.append(info1[3])
+    else:
+        combined_info.append('Unmatched issuedate')
+    # print(combined_info)
+    return combined_info
+# info1 = ['', 'True', 'Z683365(5)', '06-96']
+# info2 = ['lok wing', 'False', 'Z68336505)', '06-96']
+# info = combine_info(info1, info2)
+# print_info(*info)
+# text = 'TAMKing Man'
+# if is_comma_present(text):
+#             text = text.replace(',', '')
+#             if not check_integer(text):
+#                 if check_alpha(text) and is_chinese_name(text):
+#                     name = seperate_name(text)

data1.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"name_on_bs": "laupakching", "address": "rm a,33/f, blk 2b ocean pride 100 tai ho road tsuen wan nt ", "bank": "hangseng", "date": "4feb 2023", "asset": 117923.2, "liabilities": "16965.04", "similarity_score": 100.0, "name_on_id": "laupakching", "hkid": "Y332177(9)", "validity": "True", "issue_date": "11-95"}

data_encryption.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import base64
+import rsa
+with open("pubkey.pem", 'rb') as f:
+    pubKey = rsa.PublicKey.load_pkcs1(f.read())
+def encrypt(data):
+    for key, value in data.items():
+        value_bytes = value.encode("utf-8")
+        encrypted_value = rsa.encrypt(value_bytes, pubKey)
+        encoded_value = base64.b64encode(encrypted_value)
+        data[key] = encoded_value

demo.py ADDED Viewed

	@@ -0,0 +1,185 @@

+import face_recognition
+import cv2
+import numpy as np
+import imageSegmentation
+from mediapipe.tasks.python import vision
+import Visualization_utilities as vis
+# Get a reference to webcam #0 (the default one)
+# video_capture = cv2.VideoCapture(0)
+# Load a sample picture and learn how to recognize it.
+def get_face_encoding(path):
+    HKID_cropped = imageSegmentation.auto_cropping(path)
+    cv2.imwrite('saved/HKID.jpg', HKID_cropped)
+    HKID_image = face_recognition.load_image_file("saved/HKID.jpg")
+    HKID_face_encoding = face_recognition.face_encodings(HKID_image)[0]
+    return HKID_face_encoding
+# HKID_image = face_recognition.load_image_file("saved/HKID.jpg")
+# HKID_face_encoding = face_recognition.face_encodings(HKID_image)[0]
+# Create arrays of known face encodings and their names
+# known_face_encodings = [
+#     HKID_face_encoding
+# ]
+# known_face_names = [
+#     "Marco"
+# ]
+# Initialize some variables
+# face_locations = []
+# face_encodings = []
+# face_names = []
+# process_this_frame = True
+# score = []
+# faces = 0 # number of faces
+# while True:
+#     # Grab a single frame of video
+#     ret, frame = video_capture.read()
+#         # # Draw a label with a name below the face
+#         # cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED)
+#         # font = cv2.FONT_HERSHEY_DUPLEX
+#         # cv2.putText(frame, name, (left + 6, bottom - 6), font, 1.0, (255, 255, 255), 1)
+#     # Display the resulting image
+#     cv2.imshow('Video', frame)
+#     # Hit 'q' on the keyboard to quit!
+#     if cv2.waitKey(1) & 0xFF == ord('q'):
+#         break
+def process_frame(frame, process_this_frame, face_locations, faces, face_names, score):
+    hkid_face_encoding = get_face_encoding("image")
+    known_face_encodings = [
+        hkid_face_encoding
+    ]
+    known_face_names = [
+        "recognized"
+    ]
+    # Only process every other frame of video to save time
+    if process_this_frame:
+        face_names = []
+        # Resize frame of video to 1/4 size for faster face recognition processing
+        small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
+        # Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)
+        rgb_small_frame = cv2.cvtColor(small_frame, cv2.COLOR_BGR2RGB)
+        # Find all the faces and face encodings in the current frame of video
+        face_locations = face_recognition.face_locations(rgb_small_frame)
+        face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)
+        faces = len(face_encodings) # number of faces
+        for face_encoding in face_encodings:
+            # See if the face is a match for the known face(s)
+            matches = face_recognition.compare_faces(known_face_encodings, face_encoding)
+            name = "Unknown"
+            # # If a match was found in known_face_encodings, just use the first one.
+            # if True in matches:
+            #     first_match_index = matches.index(True)
+            #     name = known_face_names[first_match_index]
+            # Or instead, use the known face with the smallest distance to the new face
+            face_distances = face_recognition.face_distance(known_face_encodings, face_encoding)
+            best_match_index = np.argmin(face_distances)
+            print(face_distances)
+            if matches[best_match_index] and face_distances[best_match_index] < 0.45:
+                score.append(face_distances[best_match_index])
+                name = known_face_names[best_match_index]
+            else:
+                score = []
+            face_names.append(name)
+    # if len(score) > 20:
+    #     avg_score =  sum(score) / len(score)
+    # Display the results
+    if faces > 1 :
+        # Define the text and font properties
+        text = "More than 1 person detected!"
+        font = cv2.FONT_HERSHEY_DUPLEX
+        font_scale = 1
+        font_thickness = 2
+        # Calculate the text size
+        window_height = frame.shape[0]
+        window_width = frame.shape[1]
+        text_size, _ = cv2.getTextSize(text, font, font_scale, font_thickness)
+        # Calculate the text position
+        text_x = int((window_width - text_size[0]) / 2)
+        text_y = window_height - int(text_size[1] / 2)
+        cv2.putText(frame, text, (text_x, text_y), font, font_scale, (255, 255, 255), font_thickness, cv2.LINE_AA)
+    for (top, right, bottom, left), name in zip(face_locations, face_names):
+        # Scale back up face locations since the frame we detected in was scaled to 1/4 size
+        top *= 4
+        right *= 4
+        bottom *= 4
+        left *= 4
+        # Draw a box around the face
+        cv2.rectangle(frame, (left, top), (right, bottom), (65, 181, 41), 4)
+        # Define the name box properties
+        name_box_color = (44, 254, 0)
+        name_box_alpha = 0.7
+        name_box_thickness = -1
+        # Define the text properties
+        font = cv2.FONT_HERSHEY_TRIPLEX
+        font_scale = 1
+        font_thickness = 2
+        text_color = (255, 255, 255)
+        # Calculate the text size
+        text_width, text_height = cv2.getTextSize(name, font, font_scale, font_thickness)[0]
+        # Draw the name box
+        cv2.rectangle(frame, (left, bottom - 35), (right, bottom),
+                    name_box_color, name_box_thickness)
+        cv2.rectangle(frame, (left, bottom - 35), (right, bottom),
+                    name_box_color, cv2.FILLED)
+        # Draw the name text
+        cv2.putText(frame, name, (left + 70, bottom - 6), font, font_scale, text_color, font_thickness)
+    process_this_frame = process_this_frame
+    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+    return frame, process_this_frame, face_locations, faces, face_names, score
+def convert_distance_to_percentage(distance, threshold):
+    if distance < threshold:
+        score = 80
+        score += distance / 0.45 * 20
+    else:
+        score = (1 - distance) * 100
+    return score
+# percent = convert_distance_to_percentage(avg_score, 0.45)
+# print(f'avg_score = {percent:.2f}% : Approved!')
+# # Release handle to the webcam
+# video_capture.release()
+# cv2.destroyAllWindows()

extract_pdf.py ADDED Viewed

	@@ -0,0 +1,139 @@

+################# cnocr ##################
+from cnocr import CnOcr
+def validate(text):
+    invalid_list = [' ',',']
+    for char in invalid_list:
+        text = text.replace(char, '')
+    return text
+def check_bank(text):
+    text = text.replace(' ', '')
+    bank_list = ['bankofchina','hangseng','hsbc','sc']
+    for bank in bank_list:
+        if bank in text:
+            return bank
+        else:
+            return False
+def check_bank_name(img_path):
+    # BOCH - "Consolidated Statement 2023-01-01"
+    # HangSeng - "Statement of Prestige Banking 2023-03-0" OR "Statement of Preferred Banking 2023-03-07"
+    # HSBC - "Statement - HSBC One Account 2023-02-10"
+    # Standard Chartered - "statementOfAccount 2023-02-01"
+    standard_names = {'boch': "Consolidated Statement",
+                        'hangseng': "Statement of",
+                        'hsbc': "Statement - HSBC One Account",
+                        'sc': "statementOfAccount"}
+    for bank_name in standard_names:
+        if bank_name in str(img_path) or standard_names[bank_name] in str(img_path):
+            return bank_name
+def check_mr(text):
+    openings = ['mr', 'ms', 'miss', 'mrs']
+    words = text.lower().split()
+    if words and words[0] in openings:
+        return ''.join(words[1:])
+    else:
+        return text
+def get_info_from_bank(img_path, file_name):
+    # Running the model
+    ocr = CnOcr(rec_model_name='densenet_lite_136-gru')
+    out = ocr.ocr(img_path)
+    # Data
+    bank_data = {
+        "name_on_bs": "",
+        "address": "",
+        "bank": check_bank_name(file_name),
+        "date": "",
+        "asset": 0.0,
+        "liabilities": ""
+    }
+    asset_y = [722,747]
+    asset_equa = ''
+    asset_iterations = 2
+    liabilities_y = [747,800]
+    count = 0
+    invalid_list = ['', ' ', ',']
+    for item in out:
+        detected_text = item['text']
+        raw_detected_text = detected_text.lower()
+        #raw_detected_text = detected_text
+        positions = item['position']
+        if raw_detected_text in invalid_list or raw_detected_text is None:
+            pass
+        elif ((positions[0][0] >= 147) and (positions[0][1] >= 265) and (positions[2][0] <= 400) and (positions[2][1] <= 295)):
+            if (raw_detected_text != ''): # name
+                bank_data["name_on_bs"] += raw_detected_text
+                bank_data["name_on_bs"] = check_mr(bank_data["name_on_bs"])
+        elif ((positions[0][0] >= 113) and (positions[0][1] >= 291) and (positions[2][0] <= 500) and (positions[2][1] <= 381)):
+            if (raw_detected_text != ''): # position
+                bank_data["address"] += raw_detected_text
+                bank_data["address"] += ' '
+        elif ((positions[0][0] >= 996) and (positions[0][1] >= 289) and (positions[2][0] <= 1083) and (positions[2][1] <= 314)):
+            if (raw_detected_text != ''): # statement date
+                bank_data["date"] += raw_detected_text
+        elif ((positions[0][0] >= 900) and (positions[0][1] >= asset_y[0]) and (positions[2][0] <= 1120) and (positions[2][1] <= asset_y[1])): #
+            # take a look at the y0/y1 position
+            if (raw_detected_text != '' and count <= asset_iterations and ('DR' not in raw_detected_text)): # asset
+                asset_equa += raw_detected_text
+                asset_equa += '+'
+                raw_detected_text = raw_detected_text.replace(',', '')
+                #raw_detected_text = validate(raw_detected_text).lower()
+                asset_float = float(raw_detected_text)
+                bank_data["asset"] += asset_float
+                asset_y[0] += 21
+                asset_y[1] += 27
+                liabilities_y[1] += 27
+                count += 1
+            elif 'DR' in raw_detected_text:
+                bank_data["liabilities"] = validate(raw_detected_text)
+        elif ((positions[0][0] >= 900) and (positions[0][1] >= liabilities_y[0]) and (positions[2][0] <= 1130) and (positions[2][1] <= liabilities_y[1])):
+            if (raw_detected_text != '' and 'dr' in raw_detected_text): # liabilities
+                raw_detected_text = raw_detected_text.replace('dr','')
+                bank_data["liabilities"] = validate(raw_detected_text)
+        elif check_bank(raw_detected_text) != False:  # bank
+            bank_data["bank"] = check_bank(raw_detected_text)
+    # print('------------From bank statement------------')
+    # print(f'Name: {bank_data["name_on_bs"]}')
+    # print(f'Address: {bank_data["address"]}')
+    # print(f'Bank: {bank_data["bank"]}')
+    # print(f'Date: {bank_data["date"]}')
+    # print(f'Asset: {asset_equa} = {bank_data["asset"]}')
+    # print(f'Liabilities: {bank_data["liabilities"]}')
+    # post_data(bank_data["bank"], bank_data["name_on_bs"], bank_data["address"], bank_data["asset"], bank_data["liabilities"], bank_data["date"])
+    return bank_data
+########## Posting data through API ############
+import requests
+import data_encryption
+# POST /api/v1/users HTTP/1.1
+def post_data(bank, name, address, asset, liabilities, date):
+    # endpoint = 'http://ipygg-api-test-env.ap-east-1.elasticbeanstalk.com/SBT/api/v1/users'
+    data = {
+        "endpoint": "/SBT",
+        "apiType": "store_statement_verif",
+        "requestId": 'request_1234',
+        "userId": 'user1',
+        "bank": bank,
+        "nameStatement": name,
+        "address": address,
+        "asset": str(asset),
+        "liability": liabilities,
+        "statementDate": date
+    }
+    encrypted_data = data_encryption.encrypt(data)
+    # request = requests.post(url=endpoint, data=encrypted_data)
+# def extract_pdf_data(img_path='hangseng_page-0001.jpg'):
+#     page_number = 1
+#     images = f'hangseng_page-000{page_number}.jpg'
+#     get_info_from_bank(img_path)

extraction_data.py ADDED Viewed

	@@ -0,0 +1,96 @@

+################# cnocr ##################
+from cnocr import CnOcr
+from pdfquery import PDFQuery
+import openai
+def validate(text):
+    invalid_list = [' ',',']
+    for char in invalid_list:
+        text = text.replace(char, '')
+    return text
+def check_bank(text):
+    text = text.replace(' ', '')
+    bank_list = ['bankofchina','hangseng','hsbc','sc']
+    for bank in bank_list:
+        if bank in text:
+            return bank
+        else:
+            return False
+def check_bank_name(img_path):
+    # BOCH - "Consolidated Statement 2023-01-01"
+    # HangSeng - "Statement of Prestige Banking 2023-03-0" OR "Statement of Preferred Banking 2023-03-07"
+    # HSBC - "Statement - HSBC One Account 2023-02-10"
+    # Standard Chartered - "statementOfAccount 2023-02-01"
+    standard_names = {'boch': "Consolidated Statement",
+                        'hangseng': "Statement of",
+                        'hsbc': "Statement - HSBC One Account",
+                        'sc': "statementOfAccount"}
+    for bank_name in standard_names:
+        if bank_name in str(img_path) or standard_names[bank_name] in str(img_path):
+            return bank_name
+def check_mr(text):
+    openings = ['mr', 'ms', 'miss', 'mrs']
+    words = text.lower().split()
+    if words and words[0] in openings:
+        return ''.join(words[1:])
+    else:
+        return text
+def get_info_from_bank(img_path, pdf_path):
+    # Running the model
+    ocr = CnOcr(rec_model_name='densenet_lite_136-gru')
+    out = ocr.ocr(img_path)
+    # Data
+    bank_data = {
+        "name_on_bs": "",
+        "address": "",
+        "bank": "",
+        "date": "",
+        "asset": 0.0,
+        "liabilities": ""
+    }
+    # {
+    #     "Customer Name": "MR CHIU CHUNG YIN",
+    #     "Address": "FLAT 13,8/F,OILOK HOUSE, YAU OI ESTATE, TUEN MUN NT",
+    #     "Bank Name": "HSBC",
+    #     "Statement Issue Date": "10 January 2023",
+    #     "Total Asset": "7,265.80",
+    #     "Total Liability": "7,265.80"
+    # }
+    openai.api_key = "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"
+    invalid_list = [' ',',']
+    data_set_1 = []
+    pdf = PDFQuery(pdf_path)
+    pdf.load(0)
+    text_elements = pdf.pq('LTTextLineHorizontal').text()
+    text_elements = text_elements.replace("cid:", "")
+    for item in out:
+        if item['text'] not in invalid_list:
+            data_set_1.append(item['text'])
+    completion = openai.ChatCompletion.create(
+        model = "gpt-3.5-turbo",
+        temperature = 0.2,
+        messages = [
+            {"role": "system", "content": "You are an AI assistant for extracting data from bank statements. Uppercase and lowercase letters are the same. List results in a dictionary format."},
+            {"role": "user", "content": f"Extract data from the following 2 sets of text: {data_set_1} and {text_elements}. (1.) Data that locate in the front part of the text: customer full name, address in Hong Kong (including flat, floor, court/estate, region in Hong Kong), bank name, bank statement issue date (verly likely to be within 1-2 years), (2.) Data that mainly locate in the other part of the text: total asset (including investments and deposits) and total liability (often contains DR and includes credit card but might be zero) of the current month."},
+            # {"role": "assistant", "content": "Q: How do you make 7 even? A: Take away the s."},
+            # {"role": "user", "content": "Write one related to programmers."}
+        ]
+    )
+    bs_data = completion['choices'][0]['message']['content']
+    print(bs_data)
+    return bs_data
+# get_info_from_bank('hangseng_page-0001.jpg','hangseng.pdf')
+# get_info_from_bank('hsbc_one_account_page-0001.jpg','hsbc_one_account.pdf')
+# get_info_from_bank('boch_consolidated.jpg','boch_consolidated.pdf')
+get_info_from_bank('hsbc_one_account_page-10001.jpg','hsbc_one_account_page-10001.pdf')

imageSegmentation.py ADDED Viewed

	@@ -0,0 +1,60 @@

+# This program is designed to auto crop the face on a given image
+# It is required to change the image into gray format to satisfy the pre-trained model requirement
+import cv2
+import numpy as np
+import os
+import mediapipe as mp
+from mediapipe.tasks import python
+from mediapipe.tasks.python import vision
+import cv2
+from pathlib import Path
+# auto crop the image in the given dir
+base_options = python.BaseOptions(model_asset_path='blaze_face_short_range.tflite')
+options = vision.FaceDetectorOptions(base_options=base_options)
+detector = vision.FaceDetector.create_from_options(options)
+def crop(
+    image,
+    detection_result
+) -> np.ndarray :
+  annotated_image = image.copy()
+  height, width, _ = image.shape
+  # Here assume we only detect one face
+  for detection in detection_result.detections:
+    # Crop detected face
+    bbox = detection.bounding_box
+    cropped_img = image[bbox.origin_y - 90: bbox.origin_y + bbox.height + 30, bbox.origin_x - 80:bbox.origin_x + bbox.width + 35]
+  return cropped_img
+def auto_cropping(dir):
+  files = os.listdir(dir) # list of files in directory
+  for file in files:
+      file_dir = Path(dir + "/" + file)
+      abs_path = file_dir.resolve()
+      img = mp.Image.create_from_file(str(abs_path))
+      detection_result = detector.detect(img)
+      save_path = 'saved'
+      image_copy = np.copy(img.numpy_view())
+      annotated_image = crop(image_copy, detection_result)
+      rgb_annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
+  return rgb_annotated_image
+# auto_cropping("image") # <----------- !!!!change address here!!!! ------------------> #
+# The current problem (6/2/2023) is that the model may recognize some cartoon face as human face,
+# my idea is to use another model to classify if the cropped image is real human face

model1.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from cnocr import CnOcr
+import pandas as pd
+import checkTool
+def model1(path):
+    ocr = CnOcr(rec_model_name='en_PP-OCRv3')
+    # ocr = CnOcr(rec_model_name='densenet_lite_136-fc')
+    out = ocr.ocr(path)
+    #print(out)
+    name = ''
+    scanned_number = len(out)
+    hkid = out[scanned_number-1]['text']
+    issuedate = ''
+    for data in out:
+        text = data['text']
+        score = data['score']
+        position = data['position']
+        if not checkTool.check_integer(text):
+            if checkTool.check_alpha(text) and checkTool.is_chinese_name(text):
+                name = checkTool.seperate_name(text)
+        # check if the data is issuedate
+        if checkTool.check_issuedate(text):
+            issuedate = checkTool.format_issuedate(text)
+    if checkTool.validate_hkid(hkid=hkid):
+        valid_hkid = 'True'
+        hkid = checkTool.format_HKID(out[scanned_number-1]['text'])
+    else:
+        valid_hkid = 'False'
+    # checkTool.print_info(name, hkid, valid_hkid, issuedate)
+    return [name, valid_hkid, hkid, issuedate]
+# example for testing
+# info = model1('IMG_4495.jpg')
+# print(info)
+# checkTool.print_info(*info)

model2.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from cnocr import CnOcr
+import pandas as pd
+import checkTool
+# img_fp = 'IMG_4499.jpg'
+def model2(path):
+    ocr = CnOcr(rec_model_name='densenet_lite_136-gru')
+    # ocr = CnOcr(rec_model_name='densenet_lite_136-fc')
+    out = ocr.ocr(path)
+    name = ''
+    scanned_number = len(out)
+    hkid = out[scanned_number-1]['text']
+    issuedate = ''
+    for data in out:
+        text = data['text']
+        score = data['score']
+        position = data['position']
+        if checkTool.is_comma_present(text):
+            text = text.replace(',', '')
+            if not checkTool.check_integer(text):
+                if checkTool.check_alpha(text) and checkTool.is_chinese_name(text):
+                    name = checkTool.seperate_name(text)
+        # check if the data is issuedate
+        if checkTool.check_issuedate(text):
+            issuedate = checkTool.format_issuedate(text)
+    if checkTool.validate_hkid(hkid=hkid):
+        valid_hkid = 'True'
+        hkid = checkTool.format_HKID(out[scanned_number-1]['text'])
+    else:
+        valid_hkid = 'False'
+    # checkTool.print_info(name, hkid, valid_hkid, issuedate)
+    return [name, valid_hkid, hkid, issuedate]
+# # example for testing
+# info = model2('IMG_4496.jpg')
+# print(info)
+# checkTool.print_info(*info)

pubkey.pem ADDED Viewed

	@@ -0,0 +1,13 @@

+-----BEGIN RSA PUBLIC KEY-----
+MIICCgKCAgEAgTkWgzMVHIX/mYOZ5F6GIeZ5WastU7LWNmCSi2kTZQr2OjGsATCU
+uD/ZrxVObpZPw4vvXax0LGkIyDx7QG4psVEKx26IUtvn7Br+CJyATmK2dW9sCkwY
+N4x/67F9a1N8yOKhEvkcBtplphZfqZTCZ3d4VUShBt9gYGlO4odeXZ3cZLm+N9Hc
+MEP6qMIoH1KBNjhcx60BvLbODHkYRup7YAcOh/cOEC/WNkZqQPPYomcVyXat6UKS
+L1Vf/s1RnhOStu4JmYS1se39LRAxKI+xADZ7D+y7bhcBGykT7evEPGCwUAh++y6y
+Wolj9HS5oIkcxq+Rj3HLlm7ofDubeBpuOWF2xVh0jYpSFHUYkVChssmfb0WFwxrt
+YQj8aqX2C9taoWQpHdCcANJSvaM1YvLRPe8pHRpCjm/BrvxddxMNY1gCWpBCP7ym
+WAuJShb/kkdDnQ+exS9n/UbzRMzYoHnKroQL9CPn26mbzlEO7mMOj1h34rQZeTD1
+OAFEC1JFBL8LCMRkh+RT3UVpHTSFn/Oc2Gq912MivUrHbeK5Y8lPZOrEmvvxeqDB
+uOPOMpkh9LWEoGlO4GLvnMhhbINt1OnuUIRCqvOh3jXUXoseVnAMWv1QTRyreq4h
+d8GlMUR5U4dmc2XHncy1riVDVV4FYSAL2N94utNDgztKUkGL6i2Z5AECAwEAAQ==
+-----END RSA PUBLIC KEY-----

request_json/__pycache__/sbt_request_generator.cpython-311.pyc ADDED Viewed

Binary file (4.72 kB). View file

request_json/__pycache__/sbt_request_generator.cpython-39.pyc ADDED Viewed

Binary file (2.56 kB). View file

request_json/request_legalDocument.json ADDED Viewed

	@@ -0,0 +1,75 @@

+{
+  "request": {
+    "method": "POST",
+    "header": [],
+    "body": {
+      "mode": "formdata",
+      "formdata": [
+        {
+          "key": "requestId",
+          "value": "AhuqYhSgE9IQGpGeylwlD+zt9Q1o9typDou/GI0AIIxkoLL4tP8YmA41oEs3iz0UGtw2NzMjGSi/rmZogBuie8QlhOaEER+mysk/JRSj1YmMMnnxrfTBAzaa/FWF9MQ1OwpHbQc+TJuLBnuW/HnrVB/uMcl2klk0KyxD/rOgCRGW5W9ANZuWtBKRoO5ZuTKvcB3uGRc6h2iPuarUE6OcPyRL4byn2fi3ZAdAo3Uh61nCy090Gywf0qQRl54GP9uqv7R136Ilb2RNwE6cccC0F+C43eEXdnXkMdK3+P/ZNvNtsNTOMn2YfxhqaoxLef25dhukNWwA4k0JfR2waMXeMPrUxpzlkRHtN7V3os8q12mPvV2h/CSZelDw5GZWlUZW3A4rKWnLSX72/T05e1LdJ5mplkGPhSEoLUlEOkDOPmODBn6euy4mTAwAKUYLlVgbaHuG8CAMuKz3eQ6wD8RKajJF8+Y7BjnUH/fWZqLE6+QtiHjxGczKv7wxzD4kfNTj1DeZ9SHMf70IBb91URRMET1n9DTsq05FueI+/qm6VvV+TG1nbxuBBEC+yyTkB1Zjc4wxRUuqfZ7Rql4f7xfgQRYn28r0dEmgnGluqDMp7eDL6Kl1Xe7Hm0qblwf6jB4OxYY+sSCqhyIevGYJZQ1J9NwAuRhaZrwd4EwWs0G0efQ=",
+          "description": "statement",
+          "type": "text"
+        },
+        {
+          "key": "userId",
+          "value": "XV6W6EroodCeMfcuIt/X0Cw16NEHYj1ntBRHvKBxFHxroIGW4bzjIx/Z52zvOYhCu8p9iFODnq6diMZ5YjPmix34sUauhAU+Crthkh7pxa0nYsZs+ZgCGx3gVaPN4z+MmKA8PHofTZ0cH59xXLL0v5hwtVEUmQ3K5AH/7bUYl87e5J+75ml9JeoI+a/iPkAfTvUk8Q4hEeVhc0TWkbRT+u9kzMD0Ej3tPDgHILw/PifP7LYNDi1VZ9sHcaPA4irRztVwOR+9swYd56BZ2ROeN4zqkd32E+Hy8EcAGguextnZe0gWcZVbuUF/M/Jt3PSYua4rJTvQaZEeK8CKOrdnby3ZEYnh5Q7meD8zd+H0myTB9xzIi+/03gqDAbF68MTPfA3Ur8c0TE3vIBkOVub7YM0hQZMKgjEbyUSWlkxqjpYgR/fTZHxd9nk7zSfRAHXypuiH/GyQf0v7k1hbDmCeSL0995iTFM00VS3nj/Ik4b61ioFALyluxrMO/Mz4UJOJD5TlrTIgqme0ibez0tSb7OReHxGa8X2OooRYftckpQs5YxdO5ifQleGg05ZV/oUP9kzW6k7SvIIqyEuRWgAVbMcm2AJcJG5/PZR7CK2HiT0thK6wHAdKxO5gsRdxvC7tk80SSWJ892nIBxp77iY5TAnbBu5cNYBZcKiWtu6U8RM="
+        },
+        {
+          "key": "endpoint",
+          "value": "X5Ut0e359J19WgOjS/N4ui9HIr7ol/1cMhuYjB7BSiquCG/xt6dvSVtM3mbPD2cu+7FN/EoM8RNOfTq5kXO/8naiZ5HKc+LPcZfrLJDPrL33kwyw4uKxY/NEtQBc8pskTcSH5RPBX1/6Xmdu0Nb29GtAJqyZk/F65xPBrxY11W2SJlg8s58R72F6Y9urQTrankAa4xt58jL+mo6DQJjJLw+pLu5RiYU2lfp8OyDPAPfpRfMvmHT11sox1ia5RcWeKosQB0AU09JWxcbtpNYQZP+evZElkVWtOkUoflR+6ZBppjJwnu5QyTZr2wKASTmG2v2PTqeqbtns2Hagrmr8DkLUH/YNIcU7L193ffaYIoBGq21d+ysU80Glri1Xi1jO6jmJye66ansHkSVi1CPRiZPwhKB7PNo65VXIA2fYaGMnAEUOfRfZ8/XbhNOPOUX54N/gqvmiv/IQCwHvzGyHpXBJN3yFheSnk7T4d/prz7ginwUGnBm3R/+IhWvqIg8owxMqUayVmZnBVJ6UuTHDYGmg+lZ9A+R0eOKfSwl6W3uD2enKR1XBC7PkLZUA8hUdx4ZWEbkCcR5VnmsN32iLqVdrX8EdTl4kFkD+fHH0tjJT1RpL/9s8J9V7UPERC+V49OlQYLOaxGuQqHxp5KAhfGJjXCCzvo/Ikj1WTs8zvgg=",
+          "description": "SBT",
+          "type": "text"
+        },
+        {
+          "key": "apiType",
+          "value": "OZoO3DjuNYXElX/9GSRKvvnKOLLK47Ps0q77iX6kvEABqbvpDc0SZiSaOLYyx8ZpWFbhO0jzBxCCVdoJsC1POxn4hjYhc22zhSKHL203YZ38ROIOJzvB4vYoBwy4DjLXQmMf8XbuDjug/tQ9qCCI6pLIaQSvlnAh6z5zjtD0ovqN6BGX+UZxwvheMkXI3yv35NGE0HWvCPwAj2Xv6ZJ3UYATv60j9nZjH9ih6+P8WgtDvDXdzlOFmjwPGiFMvzLSERkhvoFxq/liaXSAGORW/gBpfX4Tp6Gf6cWz/5ochu9XL9ojwN/JXkKs60iN8Vrha1nIjF5SHyFhsI9y6sVeipU/gsrAKETmE3CpF6rFBoFDm+3VuZnKDFFDQ4U+J3f0UFOjj6diUrzOiq4nH6YgYLP7fn/mDuSweIVF53/vbc7PPbpgipy1K9xqlaRw9uD6vwew+NZOm9VwuqSZFmFdlch9yi3FA82dHN1Fb7HvopUpBzvSVXgW79oyIVZwzw3ifng2rxqoBbUTzg2w7qy6JoWIxoo2IZTYlLNEP+XEUpNxjyCLtuRgoliYowAZABVwBQEQufqAqrZ9OrjQlf8y5Ar92tgfW25BZ+BgwVW3ILA/wthWWHTQvMiEEZ03BvYOXDM7vNrZRvlQ3Nge9LGTmsQimslxzViTPHIYL5gES6U=",
+          "description": "store_legalDoc_verif",
+          "type": "text"
+        },
+        {
+          "key": "docType",
+          "value": "aGuIy/yK6V76wmGj/glbbEHn6j3CyEhmOJdqRsq9KIx8W8lcTpOLe7JlJIV7guBYBBOyno/4rmLWXxc6f5VaMIk8e698FiaVpGtxtsXT7sbXNPH1lEuCLKDVxpl4TxZGTsEVfOPQ4Gg9Nnu1VwamJvYCODuzGUKjgFQdq00DSIDAmBWRYSuYRdM6qVTk9lo6rtqjO7N8yw0iZQBIa9C4p/hDnzbrojcYhIadrXo98olMe4qrm53Qr3+B/YqAehu04Rt0hNbA/ZLKpsn9GZul7w1H26mQUaDPWApKsCvuzny523r6ejqpEDRDPajhjOLaztJCWCWy1yIXGq3SScxK3Iy7FCWiDc9frvR0G+ra0ar1h/KKoATjm4cjkvyRhHRvT583MjPWWFshka/prRU4Gaoff1v0//qAicN8wPjh4mhAehznJ3XpSU0GWwbYCmkgDbVnx6dCh43j6DYAYA24hnbRtbYLcJoYhasQfCbWeHs0BrXFe2eN7zyIRlrsHCjwILkrVlrZC1bhLEC7TcaV6GGDLhVWTx0+KCT50/yZb159xNXjksK8PCqh4W9afAu0cMtgAQT/V35V7zTpmRZRHn35dPlLYR6KtAUTxR7XCd32wLfyUPdAFYaLA/Ks5psUQ7SChRRhPh8k+kUUOA2uxI95YMyD3tjX03Emnga+0ZA=",
+          "description": "HKID",
+          "type": "text"
+        },
+        {
+          "key": "nameDoc",
+          "value": "UMsvHPUFARGpvZsFGFIjZD8VQ7ft54rizY2klq/nVrnguWfgIMFcKYFQ9b31UYj98l8f7v5s1h2rbY8thlvKoXcS60heeW1G56LKcXnJ0UFOOLJdyCY9Jbrt3gv3EkwrNb+GMaeh93dFmO5w7XSMAKtFyjAESpO6E+Kr/T/U3VQL/TctMOeGOKN3Cv4N954Kxg4mSetAJiYrPhNJnmewFKQRtawOv9Y5YKY7fhLDpb4VXpFeEA7g92KqgpSmnXa6AkJhhphkshakpgbfVeLU2y5n6YDnv9BBQua9CayqMU7rnI9rtal/SubT0G+HEdJyyzhu/ZmD1wm3BO+QWIieCyU6+GO4ymNNSugb8pPOd+l+e/ritxwCmvJCWeDB+qKTJSvkgBNLG2ICS6+SrQMVFgwmLWbD0oZO3ru4oY35g4akPIP6BkEZ++P75kTOlvKWkZe05Yy6DqFvIUaxicaooTOg1NWxAKWfEizyZ50scGP88pG4+XLW3IUGCniUA78j9Z4SkPnVUTrKR/RnJsWN4jdSubN/loatYhiJZATYjlQrftRM3NFFpmfUcm8wIErT6mBMDb8oT/n2n0YOsld1nQIXartLn7wM0egibLkuihOBGlZ+1lasnSBRSY9YMyI9msUnA9bslu+k/kga5qIp0bMp7f1rEKcaR6taS8hW5gY=",
+          "description": "allen lau",
+          "type": "text"
+        },
+        {
+          "key": "docID",
+          "value": "Pz9QeLnCZxZzG1qb1F3FQ7Dqy0Dbra0Z7GTVgfk89nQz/K6Ui7MXuqa6WXNKc0mDYEPmpuOmVu165GYs9Ws2+L7yXrnwcg8mePs/MpwtdvCr6YYeMQexgOzOA4oFjjPMOVMP9Z70znUQeJSVGJcaazswagwp8gRIyb+fhbNjbDY9+aPGKAxzlzlzhTAp7ZihWg7XW/9kRLKrGK1EdKFEDUq1grZpszDS7SwGYME3nCah753JNU2Zt2jJn1yd5Rs+OeSgSuByR1YEboK7MyLnxTqlU0DJvg5GNs7L2AGOWZtFPjUdMKUuPw0teaAUtTC+sms5/vHghRhIzTIr6S6/5PcId3TUCC+qGphIrVf4MqR9wQoJuU4RDYMUSf2Hgod3yMPe7jcokJaug17ToukriaBVWJ7nZoBM7Wa5JkD7wge4qNeBivS/fytikcWsMK36TpZPd7cEoRVn2lmADXJEl4DpMAMUPqKILl41wjXgPvIs5YzfoBC45yEXn6DpsKyU4v7RnnpRT6PnGmKSI5h9H55jFS8vg4f3GEMoXnbRiRn/QePx4W3fBNh8Sfh4oEtnbtC5tGxVhDrPxlxSK1ySXNlGLEMAQCaCeQidKIO3gWgBdJyRhgWlmuexUh5N4jrNlnRgwbUkxhgpcrsweYlNiJjhGUOULABDFJjLSAd49Jo=",
+          "description": "G908833(1)",
+          "type": "text"
+        },
+        {
+          "key": "docValidity",
+          "value": "bp2OFdPn7t7YfIaIKIyzt3jBFjPNb0WtxyzzzV2CX/R1shy+/Klx5yEQ60Rn8HbcInh9Dopbp3K0JWB2rPvjyYhwassyVjsbGcGd7QUJntL49XYnimd4nLhCsI0eVYNDFWQk4lfzgXqH6GD7V/8xxHZdS7FnWp1zf82s3m8PwqmfTD2XartaQh5GVf2woin6YhCN/XuPMbcZCxhDXOedDz5hf095rk83jWyMnn6lsC1loeOGoVza744chQXRph3XWAXuP2m2ZG3zMxMxomGL2AmZj5tmJ4DNWCPP4Qthi6ZTlSapmb05xMX47xWsSZA3Xd5RIlbVd+Y1iNR+7Qs6oaf0qR3UYaV3BA+Fh83StZKLkkZK9bk+esuRvHQa0Q4aqvBa1S3YAF0soN+ba/UM8+AmlMs2OSzFYN7fv8VR1Xwclw1NJQqvIg6aZyFZdk2RbvmNgkedsuxtye21yawHpsXtcHy415YR8wn5aefsfb+DAwnKZ/6gZUlpCKb9iZ+9xlzXocadLInCKBiTyHYMtxj/VPngdDDZQQZcJlZTDr0ZMq2fdmcYZbPVQmX3UVaa2Zu2Uqlm85pWH1EKvgxoYSWx/FT90hZU8r9DHfjOthOJwwCgAe/mvpNDs/JzGMYUr89qOVNtDycnkJvcC3OYwvNgElSL1pipXS4KM/3W3kU=",
+          "description": "True",
+          "type": "text"
+        },
+        {
+          "key": "dateOfIssue",
+          "value": "V1Qw8e7grhgy7kQxYTivhMYnBDixs6KOe+wE55uJ4orkrP/0sBjZWtty7bUYZ7wBzg0bujLHPPmmWRnS/UqSwhYr4D105b/J5ZY+w/Q5g9gKkWEPnWcJPtJ4ATt1KxvbCFN7AFT67+cxKaGDf0VE6HBPuNbSufOJ/55X4Vaslb/TpTiyXSSg8I29Y9vVfZ3m9vZwtLAIGgLo/HCoEuSfx8a8ntKqJOr3MPjOTN/Ml9kISDpnNqpsYnav9ZXCVfMZaTxXVIegvct2bJzVmf1gAMXry2EpDOocjugfAxY1ODEFFmXiHHIRDNL7NMy8XeU4iOQcTdzcTKlmUR6YYGk5pHOIYaBZOx56ge/EIP5+D+0Lv3R/KoY8GJllFtnK3EO21GrT6sRLWB22CV+cGhN6xkSCl+wIMN/X625p1zf2kPo0hwnOHPsCWwzoWIDUIl2K8aBBPckl190da9Yb/8SPJEZrsFzK6JgYMOGsTQ1J7+Jsn6KXV6c0Mfb3SAoyREIpZwJQKWHqQOHJVJVcZ8wA3fHAQKsUxSElDsIfxI0Jbp3WvqO9pIdAX/dAnEmtw8ajH21efx8vWlV+8GtRQ5QCLhqF7ioPMLQt2Kkl3kvs3FvbryzME1+Y3Bomz1LaS+gMa4s/dLcG+Q1pBSUfP0WYMFjXqCDMkPLZnhJycGXx7+k=",
+          "description": "07-81",
+          "type": "text"
+        },
+        {
+          "key": "matchingScore",
+          "value": "OjG09vdihB12ZPb8C3oqVcQMhjMbBSIRjKtzDzr96QCDodSx4W9uIjn9e6zWHW7RyOXbm0hiqKae6d8ZS7hPpqyIF7Yfl1OAfnuEkQ0WfLm8yuUvCA4oFSsH86gx1EYeDUYM44aZEJ5qqPK/IUKvXsHSGgsoIZ0QniIHn71Q1K28zc79iZ0UCkkRhhitoF68JNY0Qik8hiX6ES7yiU0daq7vFIbw0Tg9JLr8fpw/81+Fm7zVfhAx5T2LT2cag04da+YLKSUIjsksQ/CCiRQSDRfdbOdZ9Os0tiXPZdYoIU/dxVerlUJLlmiMYYnVte4m/8pMlw57bc8oKE+qka83R8E4hH9Wu1uOcWHjdPYyUJdunyXByXtM5igrUHgmLvvUQ7eBfnrK5+HZlhHC9tpiTVvcWJd086lS9/hi8UPp0XgSc3h9TcQU2EiC2rgnC3PbPIdjo0Evb7M9P2T0xlA68Na3uW94hBoHzoyS1VmLFTo5alR4LteFBhZ/sCeMp0m1LYs4ZXUOCS85FRwK1x8WnomXpxOAFvBai5JwiLLqkNyBG90LEKQRFFyqk4dceBjOGk+YIq2fTWIMGNTNrtIltV7tf9GJS+LwLBDGsofieYceGeS6ekGDgnNXQJpIRTLccD6qX4FNl+W2K7g1M0xVMNeJ/B7LV3Pb/7uD+s2ASyo=",
+          "description": "0.957",
+          "type": "text"
+        }
+      ]
+    },
+    "url": {
+      "raw": "http://ipygg-api-test-env.ap-east-1.elasticbeanstalk.com/SBT",
+      "protocol": "http",
+      "host": ["ipygg-api-test-env", "ap-east-1", "elasticbeanstalk", "com"],
+      "path": ["SBT"]
+    }
+  }
+}

request_json/sbt_request_generator.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import base64
+import os
+import rsa
+from datetime import date
+import secrets
+import string
+import requests
+import json
+def generate_token_id(length):
+    characters = string.ascii_letters + string.digits # + string.punctuation
+    token = ''.join(secrets.choice(characters) for _ in range(length))
+    return token
+# Examples for what will be generated
+# 5!bA9H2f1q^...
+# Xe7uM$4d9@...
+# &3yTb1*8Z#...
+# %pWqN7!6zX...
+# @9oV!s6Rd2...
+def get_today_date():
+    today = date.today()
+    return str(today)
+# Example for what will be returned
+# 2023-06-29
+def generate_request(data):
+    url = 'http://ipygg-api-test-env.ap-east-1.elasticbeanstalk.com/SBT'
+    pubkey_path = os.path.join(os.path.dirname(__file__), '..', 'pubkey.pem')
+    with open(pubkey_path, 'rb') as f:
+        pubKey = rsa.PublicKey.load_pkcs1(f.read())
+    for key, value in data.items():
+        value_bytes = value.encode("utf-8")
+        encrypted_value = rsa.encrypt(value_bytes, pubKey)
+        encoded_value = base64.b64encode(encrypted_value)
+        data[key] = encoded_value
+    # Write the encrypted and encoded values to a file
+    with open("sbt_request.txt", "w") as f:
+        for key, value in data.items():
+            f.write(f"{key}: {value}\n\n")
+    # posting Json file to api
+    r = requests.post(url, data=data)
+    print(r.json)
+def split_data(data):
+    request_id = "request1234"
+    # token_id = generate_token_id(501)
+    token_id = "12344321"
+    f = open('data1.txt', 'r')
+    with open('data1.txt') as f:
+        data_raw = f.read()
+        data = json.loads(data_raw)
+    if "avg_score" not in data.keys():
+        data["avg_score"] = "0"
+    legal_doc_data = {
+        "endpoint": "SBT",
+        "apiType": "store_legalDoc_verif",
+        "requestId": "request_id_id",
+        "date": get_today_date(), # a string
+        "tokenID": token_id,# a string
+        "docType": "HKID",
+        "nameDoc": data["name_on_id"], # a string; lower case with space separate; e.g. san chi nan
+        "docID": data["hkid"], # a string; with bracket (); e.g. G908833(1)
+        "docValidity": data["validity"], # a string; "True" or "False"
+        "dateOfIssue": data["issue_date"], # a string; month-year; e.g. 07-81
+        "matchingScore": str(data["avg_score"]) # a string; e.g. "0.957"
+    }
+    bank_statement_data = {
+        "endpoint": "SBT",
+        "apiType": "store_statement_verif",
+        "requestId": "request_id_bs",
+        "date": get_today_date(), # a string
+        "tokenID": token_id, # a string
+        "bank":data["bank"], #
+        "nameStatement":data["name_on_bs"], #
+        "address":data["address"], #
+        "asset": str(data["asset"]), # a string containing only numbers
+        "liability": data["liabilities"], # a string containing only numbers
+        "statementDate": data["date"], # a string
+    }
+    generate_request(legal_doc_data)
+    generate_request(bank_statement_data)
+    # demo structure of the data
+    # {"password2": "chingfuilau", "username": "Allenlau1111", "password1": "Allen02118173", "date": "2023-03-03 00:00:00",
+    #         "credentialId": "testing123","requestID": "test_statements",
+    #         "userId": "7893456",
+    #         "endpoint": "SBT",
+    #         "apiType": "metadata",
+    #         'tokenId':"500",
+    #         "ipfsLink1": ".",
+    #         "ipfsLink2": "..",
+    #         "ipfsLink3": "...",
+    #         "membershipStatus": "1"}

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+cnocr==2.2.2.3
+face_recognition==1.3.0
+matplotlib==3.7.1
+mediapipe==0.10.1
+numpy==1.25.0
+opencv_contrib_python==4.7.0.72
+opencv_python==4.7.0.72
+opencv_python_headless==4.7.0.72
+pandas==2.0.2
+Pillow==9.5.0
+Pillow==9.5.0
+Requests==2.31.0
+rsa==4.9
+streamlit==1.24.0
+streamlit_webrtc==0.45.1

sbt/deployment.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from web3 import Web3
+# test userID: 1001001
+#

sbt_request.txt ADDED Viewed

	@@ -0,0 +1,22 @@

+endpoint: b'O5LDGUcUb6knu8lOvKTVd5EmjI/9yH6P4KraYx87dQQqJxJ2rmKUgk+qxavAJMFOWfhEuMwZsFB43Y3A+F6QoqBBXZfj0e4snOHWZKXEKeKynlmIR7c/Cy6bEK9oUrkrSOkPd5CZZE0/BswZMfh8XidW4GQTBo6bybHkBpSKYpNkS9W8GaJLWqnDEEKFI+KSS3gsE55PEOFvKQgB7s+icT+GF/2l5vOWKqzc8pHFFH9nyCF63zRLpAVe+ZEB52dAV148SJNlPNsr4ikQLOqOOPCYF35c5lbwGyZPbib0+6pc9Z4evxPUqzAAsrSmkwUr/c1Rqg+go4vnhJ3EfSjGbuMAeMJLdtLNzglYw/93rKYxG0Jc/w/4RWS98PHPGLA3GEvoD30leOfwS/yO8N0Cb7v9u3XwqIy/FNcdDOBii9GOmXCLJn6u8KvbyfhsBM4MtiqwGuPIBB/Wu9AVVqJGGT85YgrnWBnJfoMHvYlhEM3xEe5jSkeMz+G1h7vXye2wdKKc7jmt748i/8QRZ4TsFUcauU2V2OPD6eGcrjg4MKma0Nu7UKQyUJKXg9azDdn8YhMqzfGQ8vkoj3hQJNTIR7aOyXaACjqEpG+kWC4mCjPciYGowzp8AmY+QK+KSRjwpWJr5uvBCtqFH8OwUZuS5IAaqMbrBsufLdh7XIF48cc='
+apiType: b'Tg6rz5RJnLaseo/MDn6Dz6ui3MG2YKQYdbwteCT9WdN++AyLXRnxGepXuR1i5oEqmTpxdZsA5mUnJBEjIh6EDJAXgd3jG/0Dy5p7TGe62Rmla3EGOkdjHRNWBSoXYOeivLxdH+lTe6x3+bzc7/cgbDjdJ0auBkid+avLZM8KtV3vGtOjHey3D8I+7foBzALse9TI71LHThKIg3Hjeq6dj0SLkkaz/DiLFS90JXa9Ip5NZ+CGtJ5qmDLfqjBpOLLZ06aXhEoLgUVwk4vf/d5DAOxsRyhPfK3wao4UwaVtj+NO3bR4/tQfCQ2CnFNvrr7GQfXhqGx1Mh1yOoyClZPCj8I+r3hYYrmofAR9PQF58qkmuuxAiBTqNlZHyQGmbRDOUaNBPy2Tdr67MrM43zmAFysT8HJBUCcLXWv0FcGKCqFv941usFuboOaoRH9ib8hRqvt757arQPwdmIxyayNkOWgWF3kQTt9iSbleWk61l06EUgdX6qOnAdEDHLJxPYGdrGHVwuRlT7Sj0RgkSviaom+0xcU/T24auDgZj/OjMCibz/QrRz1Ap9RMnteBGjQoYXiIhyBPe7p3Q/6h0PZCyCqSH/IDj4I8ar0FNtynkuXKggr/GWUNV7xkLS/xujhXuziBwTedR1LYdmth99h40CzamqmYL5cj13WtUHfmFk4='
+requestId: b'Ew+AqWhPC5HmyJnAfltVVOcmDvYMRxV4dmBxlel2+AFQmnCVWY2gTz3KrTnvGan+xONkFyDRQV5Xcmm9coTRuXLqNHxQcNz7Iu/m8ipLUrUr2Kofa5/paAcD5x7I/nYgb7UNQ2qWzpXo/vfw6syY9vVK49ZhPfi2tLYY3gEqIQojQvmkhC3qzTrbQqKSQq8jWQ9crNGiDvrfHhPfPG/hdNhwhluiznhTFGJkyeh8d2lVwUtqKN8MMSinX/Jaqs7hyS3FIGILynPwgyASkKGLPFpqAmfemDPkbkzybHMThKRBcebUPdIMpll88nT73vBwslwr0bTvat4wOZQHjtVs/JDzWXnYw2zO/ljWzcl4RSNcJzKulUhEV6NInIolRQ30mTvxihJIojOT921zhlaHx6fp3FP2Qw2lHHgtAvY4L10R1Qxtmy0Xm7vx7oELrFjKZN9+w8h5ofvNqHdkE7HafF6tYiIPm0bWaWM1y1Z7+znrr0at7/DfEb+a+aUchC5xZ59zywi5cJCNGl0f1dEVQ8Ywl8+PMpb/e8PkEghl1MYXecbp++FhDbWK4Db5OL6+9n1WDSEkkaZ7wwo/SdSjcoSrIbTeVp6BSMTryMozADAWt2ETQzMPI3ts/LVBIbae1Qur8zb3tbx8H/Yeg8K4yLzho/PXDQnM1UNdR93uRps='
+date: b'CuIu6PiwXFE6zH9VRTYTmj0zht4nKQtD0CMB+hflSl4NhhzEEodA0ULRu5/ah+u+Uw24vuK+n8YrlGGC1m1j8s6axbHMwjJnfMY7ZmDcDr6ITHINKpjUpD8KSunkjNReqhQCsPnoyPFTPYc6lA/oHA9qURrE3x5OeaL3Bt9iCwTqLFslHOJzI4E6hPDoLUl0qUu7WDwYKN+eq1tsvO/eqe2Eh8TlYGCPXvipXIu3pHp+7blNmeUn4wqOEIOnUUwvQPRNemKeKZWaO/j9kGMO0T3d7CjKE6MlbPJbj0EfCEXM37qJyvQ4r685CZwcC5Xa8AkiYarbHg0adcdCBHi0GECAUzKvlgEqcTolHTTgZ5orFNmV9K6NYqteXkbIgPQiroHDImcWcu9tjTRsmkuX8eCUSUlnBB+1qD1ggHIrk6MbJhAyDSNl4+PPm0E81z7mMJm/Ho/4ikQANz9Y9cvXLhYVVOkwmzaD1RW0I9rSnGtmW2elJVvgOQfi2Oh+nHTFzZs6UzuiG0F5JrbRnT3A8AxcWbiK3mwM3ne+0NKiF+rMSC6HsnDpE7KCNpg1GtGNw/ChXBQBkLBCxiBA78FkTmG+n4UUbC7FpPPPDZkR16o/CjTOVKI7691EcxxTU+jYPpxkB9Ul1kBZcA5ayyOL+wC3BiXi5CqjDcCbwmFivRk='
+tokenID: b'cYLpI2tweuOUayPSzS/SIqWXPOMbpJinzXNTCBUo3Ew1taoLmb0jJQb4VKk1iRrD5CjnO9jS7CLyFJ1OLGsTNHyiN1dc/P2A5jGbCKyq5OOoKVd+F6IamfHy2YMTGjb4nDP8dMfeUlz5R0FYx3yiuQ3oY6CHYAm/TKYNLaSTu5JXpC4FrVlsVnExDzgsAlMZN52qtVNgKyaO4dC1NVzdlMMBA3Wqiu1+d77fupWl/mQTqWSw5BR2pbe4yZYEBskjW2R7d7xl22UxOOar2tY9yRYKbDB1K6M/00Lm9ApkFCXruz1K7/kbz8IPi/p79MSXpdBLQko4/Z5hxZ13+Jc6QeAqcgiENC5wjYdeaJMyYOGvCc/6XpFq4Pso3D3y3j/qytm/0S4QW4/OWwgQj/DggU9HU55I8HxP//SfHbO6CBVRwB86ym18YV8Y7z5pnK3urZBQYE3iVSfhB+dj2XOzAE07uKHNMKlouKzoBVq5voaYVvBFf2odVFDRbsoa1chJxGcr08dARLZ6lt8ZyKsXqNj+EKxVAy0bHOU+F4ZyIaQyVg8p2jUyOAH2WZwlsmJ8yg0f41ddlyHwIpurCuRa4L4wFlRblmrSH8hxjWIwKfqhz5CJSknvQGYksfzic0vv6nVRXMx8jcdq/HGNn6iiPZw4hluVPIwF8j+h4zsA/Wc='
+bank: b'LbIOvIo9qYAb4jJjczhspqbR1jPArtkyKqfE2oM2LuA/d1oZTGf1VGgQyCt5T8m5UAknAZu32A1eCOkPx9B0xLW5lYuEvnRgeyvNW46FM4Fx/iFrLns4lClV2ivqJ+cWV2BV57u8p1Tj0OoVbVPSl6v0T+Pxg5YFJvNjU6+w2lRL63Q4WiKFKVCHfdKbxzkfCE3UtK+GTm9D/d28O2ql9lRBFFnsBvRCYa9MhWSx50CNv81kC1J0tELyejtVxrlnW24SVVt+S5v6kV70TVHYfmkoeh5gd8XJlGEZ8Ww30UoYsos5pgaywvgGhcKMtHF5gI4cAY9Q4xlcEpUcyFQSMB76+HmX92fqUxAfVJRCG1iUrFP+vswZ+eWvx0LCXG8Wl7tYSWzBD9NmKADiam00ODV1f/woWFp5GPr7eupUSPZhI72/I3s4u+m3bvtFFEFJKIeCAX1QcmZL6r3KKsp6PSDR4G7iCk2lbhpLW2icasYy93ziDnUn5FozgPrnjasSRxDB83yOqw619AFHMTlFeMfsFEKAz16JzBUz0Bi6PujEfKZ/QDLFK6FyNHicNaAPtpsci4PlMLSvoOVV5Uf+lIj2yBNYyMfSYda4wYJ+djk8GhguN+Cx/UWZcnTP/C8CS0wk8/iVVkSk+oAKM4esLDDgocl4AhbpuLbfB4XIQ9Y='
+nameStatement: b'KoojlgYLT6dN8GI2DB3TDX8QKwZXgGYZUjX550icBht3gFn6hkoejR4dP4TivuFhaVb1muER0QTyjWog5N4yT9iWJHpfXV8j41SrFst4QcJO81yoxTY4nguLg3Ie56RYk6FQgz2dnKx2WPs/M+Hgc+d+mGlb7mjPtmD4zFVJJHyDaVeUdWWZWVVd79yIbFnNleCWxPjByfsGaDiIGdkoZApiLFknBn46q7/JR+McJXYfPajPZZK+pOExGHj8TKPS9nEuz+73POX1fz/faRWGYDtqIcpkoi1VoSKY1cN6G05NM1N2KAOmxPw4rXy5bYWhNNWreMQNH+P03QQQUiJgo94MMph5aL5G+dc/9ei/GLwZe7icMu3/O4S3y/ZTwOwt8nHu1QQukKEJxzvr11YChUGO/Hz9c3vGvxOvTzaYJhR9bSsf9tB0iVXtatsk6Ng6kOCFICatLFd8VDOxqiVzzW8gxP31AV0xsInDKbsjyOKdIvwxPwFX2+XEjWQ+YyTFuw16DzJniCPP3+eFN0VBK6PBf6OgeEJbtb1yM6hB00ny5JWoau55LIajHEV6/wp3fr81V151+0UdXzTtmmXCy+pTS0gqvc4Nm2O9Sar/iNpiwpyJqd5oxDb/RRcLExnFBGo21vh7vILAdR4ajeX7PPZhVcYeAMMvwOcqSit8G+o='
+address: b'bj5oVnpsoxk2n33yVxia8QhDEI8fRxKMFsLvMy6EaQNKwolxrp1d2at4t+u0CmvkmTfCcgYd1Wr9oaJ6COmJfJNIwgy/ftZL/s+6PMzfBgapt0yDSBSDM4vtRIlIKnz4+BXCSDwwpqpCMo5J0OdEJaa4K3qf/9S1B1gf2rotmbt2xo/Ipsf7lfG2Rk+gSiGcs2bsMBBPBIqtTgf7SxXKk7RkOU58nPz+mnZEjdQgIsrQFJqII6+Esw7puTmDtrgGLokTSL7IXqtPw5V7Z/XXbqzImSuEc7wpk7ky4LU48wJ+bGe3JNosOB+c7KEdbiz4gIC2LCu8lw8kWbBaqsfXUzX3QCw+9n5g7xAw0VdrF7B/e3G8pCrmgoJis4NQl5Jy30M5pe7bUbgxwLY9HGmWzBgMGHCcpmrPO0ADpOM7QwdTPmevA482ZZTHzlny6mp3wWJAbomnuXJ8NvFnYMWIq7eImPY9mAKxjYzrX2fQ2qVr6Yr/T1bqErekvKz0V9eTrD+xu4nLyqmXe7/sD5f9yPY9PcLVAMMunBwf0x+aCsX1bqWVLPe4HG0nZgecZizH+Qx2Qv8PYWqBugqXTAVYNcS7vGnrsgoUjnWjTUTTIB/VoZCCtxcb0oDazoZbqJ+8+kv/MRsO/qdQd7nDbnU1oYg/RLfb87PXGQWYk5I0qPc='
+asset: b'JAu5OeWuUZJAh7+8isnKhlUNaXKtXgmErqrUy0+YqOpY4P0nSKCT0DH7nmX1DxHn1ezZBMFkM+wcO/9a/9R4WM2CABl4nLYzuu/y20dzdl2w0MqzHla8OaIQIoBDms/XPaBiLByAVDjSm2UCt81NG25VXXn8LNMfmjvCXe8VBa0fBgZrNnPfR9YF8GRMBMcctSHiPrXZuvIHEvcHhbEgHa3zo7Gq5ApPqFrgiVwmdRkFjf5DxRvsUXPlqeUW7nKD0NLWIAUlM0L0/PnQpKLmhexYU5qgsSXngFH+vm7NBWas2AQXMKX4WNWCLqlkljIQ3vlHh+Q7lZZW2C9Y39tz0ghC8eLzygTGbnxxzT2bSk5YJMLy8Gm7toJTGhL4KoMEPO3LC/C3mnVGo9Qtdenkt634d616AILlMwWhf9SuhIPbygBegmrcxdtjdjDHbGopztn6Urcf7ai2+5dPVTcyWOiwZf6AXsGqf9TTmK/DLWVV/MSwRUPDY/HKCPwS6o2Bcm86MNJFOru4Ez7qPIArvjjgQMdYFXeO/C373oOX+EBFxLrui8Tz2t4SNzNGy+Tw9kVZDdJYUyoE5V6q0VFLmrE5J8TFKNsTz2LYIdkV7GIMITavvUqpieXfj5oJpNDA5CSTXl2OQmArLzbO10VETsizw5JPrFD5+LAGvsMLfMw='
+liability: b'ND9CxqaBZFqxPImrnQKpCsY5pW74y5cRIbfxsfwlrOTnPGKXlllPXNqgwOBlJwvK/5xJA3u7Taj/EGpyRwvnH0JUwiWRWt6i+JlKD6OuRwYEhbvYZwWZPR6EFkI0C97Yusk9dJHZd6AAm84h5tPSnx1p2Z4g8IthGgrP1eLKTqK1v/cc+f2ldkKxM/iaEDemFB7LzVespjkUaE+nhfoG3wk7v1+BPSZt5TK1zwYDj1JdGP5JyuzyY6Niwyk9ThcHnuyJ8qMby7qsod1drvdEESDI0d0MrwYzVJBekhYQpqiOOYpnbkxXAlVkFyI2L7DV4+ue66MeFZfsad19FGQA9DGvCdXMAx+CvigRJyRG2DFMOZ7BgiCAwFJQAj+ayma8C0mHpeJChrt0i/ZOskEG6rjVtLPeQVvdvuV1NX1PDSpVeFD6ml/ZjN0BmIJxofbxRm/CiffJArbtT9dpVPneY5/nJuRF7JrLSx4+zSlailj7RHdxEsy8fq7x78Zl3jPhmtn7P0Gnd8+epgo+66IqBIHOSUpy2vM12qq44xaxX+bVChg6CGBp8fgwo+49/7zqWQ0/hXi08/ZwAnhcwlXBVi/U7zJsW4vTS0ZaizX6uv9oy7MMsDaqgvmqUr+XHUV2xQPGjswrpUeNhJVjMd+598+fqsGeJ8AYXkSv34EET0Q='
+statementDate: b'LqkAFp9Li6qYamXMH8UUndrGhquJrtNXdMgG36EjZoFF72ire4Y0Q/S9q4HnKjOYlobVxxsxvkDdUiWxJa6uuuULMkabaozKLnDsvSfZCMDZcvEEkxrTqYU00fetcVCFieS74p+kS+a3a2Ohq2zlnHZEG6EmviiBPTP8Q3wGpQXYkJNQ8cPbum77dvYLabSUu61xUU0B6Dz5QHI5W/6NcFFgT6kB243eHoK4L3qJGaSm5cuB10iU62PY8HYoHTeKQ9Ve59qu5KPyNj9VwoD/tZebNpnveIRX68T4cmkGW3PW2FeyL27mxOsCMQrUCI5oxOFstO27a/Yfqg/CwRDnfYr+8Hm7vohB0bW7hSD6dfCFWfv4j6RdQ/T5ToF6jKmkBPiv6IGyR7LYAb/zr9drgSRxVaoR9gZMv+yq9xPJENQbDUV2P9N8ypRqS/+hNIMyx58TpnJc5st7J+hehWJT/5A2M59EjTVt1mwYLwbp9rjxevRoe5YZDxepeFCrS1gvyg8J0MJfE6J/RWogEAu9UTkgpks0s4NaXPGb/hMs4wMs9xsZooKjtH7p/sDTR8JlewVcxirl/xg1lxKELFd1qy4AVaFR2PAd8dj51t+7oZf+WCh+kODepzGgo6JOiBhAHAKuRd5lUT9J3gg5vZzaKEoGliBlEj2tNWDvVpbXUI8='

similarity_check.py ADDED Viewed

	@@ -0,0 +1,89 @@

+from model1 import model1
+from model2 import model2
+import checkTool as ct
+import extract_pdf as pf
+# get info from hkid card
+def string_similarity(s1, s2): # Levenshtein distance algorithm
+    if s1 == s2:
+        return 100.0
+    len1 = len(s1)
+    len2 = len(s2)
+    matrix = [[0] * (len2 + 1) for _ in range(len1 + 1)]
+    for i in range(len1 + 1):
+        matrix[i][0] = i
+    for j in range(len2 + 1):
+        matrix[0][j] = j
+    for i in range(1, len1 + 1):
+        for j in range(1, len2 + 1):
+            if s1[i - 1] == s2[j - 1]:
+                cost = 0
+            else:
+                cost = 1
+            matrix[i][j] = min(matrix[i - 1][j] + 1,          # deletion
+                               matrix[i][j - 1] + 1,          # insertion
+                               matrix[i - 1][j - 1] + cost)   # substitution
+    similarity = (1 - matrix[len1][len2] / max(len1, len2)) * 100
+    return round(similarity, 1)
+def get_data(img1_path, img2_path, file_name):
+    # img_fp = 'IMG_4495.jpg'
+    info1 = model1(img1_path)
+    info2 = model2(img1_path)
+    def print_info(name, valid_hkid, hkid, issuedate):
+        print(f'Name: {name}') # name is without space
+        print(f'HKID: {hkid} and validity: {valid_hkid}')
+        print(f'Date of issue: {issuedate}')
+    cinfo = ct.combine_info(info1, info2)
+    # get info from bank
+    # images = r'hangseng_page-0001.jpg'
+    # bank_list = ['bankofchina','hangsengbank','hsbc','sc']
+    # image_path = 'hangseng_page-0001.jpg'
+    # post_url = r''
+    # name = pf.get_info_from_bank(img2_path)
+    # name = pf.check_mr(name)
+    # name = name.replace(' ', '')
+    # name = name.lower()
+    data = pf.get_info_from_bank(img2_path, file_name)
+    name = data["name_on_bs"]
+    ############# Similarity check ##############
+    # img_fp = 'IMG_1234.jpg'
+    name1 = cinfo[0]
+    threshold = 85
+    # print(f'Name in HKID: {name1}')
+    # print(f'Nmae in bank statement: {name}')
+    similarity_score = string_similarity(name,name1)
+    # print(f'Similarity: {similarity_score}')
+    # if (similarity_score >= threshold): # Above threshold
+    #     print('It is the same person')
+    # else: # Below threshold
+    #     print('It is not the same person')
+    data["similarity_score"] = similarity_score
+    data["name_on_id"] = name1
+    data["hkid"] = cinfo[2]
+    data["validity"] = cinfo[1]
+    data["issue_date"] = cinfo[3]
+    return data
+# path1 = 'IMG_4495.jpg'
+# path2 = 'hangseng_page-0001.jpg'
+# print(get_score(path1, path2))

test.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ import streamlit
2	+
3	+ print(streamlit.__version__)

text_reader_v2.py ADDED Viewed

	@@ -0,0 +1,18 @@

+from model1 import model1
+from model2 import model2
+import checkTool
+def textreader(path):
+    info1 = model1(path)
+    info2 = model2(path)
+    def print_info(name, valid_hkid, hkid, issuedate):
+        print(f'Name: {name}') # name is without space
+        print(f'HKID: {hkid} and validity: {valid_hkid}')
+        print(f'Date of issue: {issuedate}')
+    cinfo = checkTool.combine_info(info1, info2)
+    return cinfo[0]
+# print_info(*cinfo)

webapp.py ADDED Viewed

	@@ -0,0 +1,209 @@

+import streamlit as st
+import similarity_check as sc
+import cv2
+from PIL import Image
+import numpy as np
+import tempfile
+from streamlit_webrtc import VideoTransformerBase, webrtc_streamer
+import demo
+import time
+import streamlit as st
+import requests
+import json
+import request_json.sbt_request_generator as sbt
+global data
+data = {}
+def main():
+    # st.title("SBT Web Application")
+    # today's date = get_today_date
+    # global data
+    html_temp = """
+        <body style="background-color:red;">
+        <div style="background-color:teal ;padding:10px">
+        <h2 style="color:white;text-align:center;">SBT Web Application</h2>
+        </div>
+        </body>
+        """
+    st.markdown(html_temp, unsafe_allow_html=True)
+    st.header("I. Similarity Check")
+    image_file = st.file_uploader("Upload Image", type=['jpg', 'png', 'jpeg'], accept_multiple_files=True)
+    if len(image_file) == 1:
+        # print(image_file[0].name)
+        image1 = Image.open(image_file[0])
+        st.text("HKID card")
+        st.image(image1)
+    elif len(image_file) == 2:
+        image1 = Image.open(image_file[0])
+        st.text("HKID card")
+        st.image(image1)
+        image2 = Image.open(image_file[1])
+        file_name = image_file[1].name
+        st.text("Bank statement")
+        st.image(image2)
+    # if image_file2 is not None:
+    #     image2 = Image.open(image_file)
+    #     st.text("Bank statement")
+    #     st.image(image2)
+    # path1 = 'IMG_4495.jpg'
+    # path2 = 'hangseng_page-0001.jpg'
+    # image1 = save_image(image1)
+    # image2 = save_image(image2)
+    data = {}
+    if st.button("Recognise"):
+        with st.spinner('Wait for it...'):
+            # global data
+            data = sc.get_data(image1, image2, file_name)
+            with open('data1.txt', 'w') as f:
+                f.write(json.dumps(data))
+            # data.update(sc.get_data(image1, image2, file_name))
+            print(f'data inside {data}')
+            # sbt.split_data(data)
+        st.success('Done!')
+        score = data["similarity_score"]
+        #print(score)
+        st.text(f'score: {score}')
+        if (score>85):
+            st.text(f'matched')
+        else:
+            st.text(f'unmatched')
+        st.header("IIa. HKID Data Extraction")
+        st.text(f'Name: {data["name_on_id"]}') # name is without space
+        st.text(f'HKID: {data["hkid"]} and validity: {data["validity"]}')
+        st.text(f'Date of issue: {data["issue_date"]}')
+        st.header("IIb. Bank Statement Data Extraction")
+        # st.write('------------From bank statement------------')
+        st.text(f'Name: {data["name_on_bs"]}')
+        st.text(f'Address: {data["address"]}')
+        st.text(f'Bank: {data["bank"]}')
+        st.text(f'Date: {data["date"]}')
+        st.text(f'Asset: {data["asset"]} hkd')
+        st.text(f'Liabilities: {data["liabilities"]} hkd')
+        # result_img= detect_faces(our_image)
+        # st.image(result_img)
+    # print(f'data outside 1 {data}')
+    st.header("II. Facial Recognition")
+    run = st.checkbox('Run')
+    # webrtc_streamer(key="example")
+    # 1. Web Rtc
+    # webrtc_streamer(key="jhv", video_frame_callback=video_frame_callback)
+    # # init the camera
+    face_locations = []
+    # face_encodings = []
+    face_names = []
+    process_this_frame = True
+    score = []
+    faces = 0
+    FRAME_WINDOW = st.image([])
+    camera = cv2.VideoCapture(0)
+    while run:
+        # Capture frame-by-frame
+        # Grab a single frame of video
+        ret, frame = camera.read()
+        result, process_this_frame, face_locations, faces, face_names, score = demo.process_frame(frame, process_this_frame, face_locations, faces, face_names, score)
+        # Display the resulting image
+        FRAME_WINDOW.image(result)
+        print(score)
+        if len(score) > 20:
+            avg_score =  sum(score) / len(score)
+            st.write(f'{avg_score}')
+            with open('data1.txt', 'w') as f:
+                data_raw = f.read()
+                data = json.loads(data_raw)
+                data['avg_score'] = str(avg_score)
+                f.write(json.dumps(data))
+        # update_text(f'{demo.convert_distance_to_percentage(score, 0.45)}')
+    else:
+        st.write('Stopped')
+    # print(f'the data is {data}')
+    # st.header("IIIa. HKID Data Extraction")
+    # st.text(f'Name: {data["name_on_id"]}') # name is without space
+    # st.text(f'HKID: {data["hkid"]} and validity: {data["validity"]}')
+    # st.text(f'Date of issue: {data["issue_date"]}')
+    # st.header("IIIb. Bank Statement Data Extraction")
+    # # st.write('------------From bank statement------------')
+    # st.text(f'Name: {data["name_on_bs"]}')
+    # st.text(f'Address: {data["address"]}')
+    # st.text(f'Bank: {data["bank"]}')
+    # st.text(f'Date: {data["date"]}')
+    # st.text(f'Asset: {data["asset"]} hkd')
+    # st.text(f'Liabilities: {data["liabilities"]} hkd')
+    # print(f'data outside 2 {data}')
+    if st.button("Confirm"):
+        # print(f'data outside 3 {data}')
+        with st.spinner('Sending data...'):
+            sbt.split_data(data)
+        st.success('Done!')
+if __name__ == '__main__':
+    main()
+    # def save_image(image):
+#     try:
+#         temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.jpg')
+#         Image.save(temp_file.name)
+#         return temp_file.name
+#     except IOError:
+#         print("Unable to save image to temporary file")
+#         return None
+    # json_file = 'request json\request_legalDocument.json'
+    # file = open(json_file, 'r')
+    # data = json.load(file)
+    # file.close()
+    # # Update data
+    # data.update(new_data)
+    # file = open(json_file, 'w')
+    # for item in data['request']['body']['formdata']:
+    #     if item["key"] == "requestId":
+    #         item["value"] = ""
+    #     elif item["key"] == "userId":
+    #         item["value"] = generate_token_id(2048)
+    #     elif item["key"] == "endpoint":
+    #         item["value"] = ""
+    #     elif item["key"] == "apiType":
+    #         item["value"] = ""
+    #     elif item["key"] == "docType":
+    #         item["value"] = "HKID"
+    #     elif item["key"] == "nameDoc":
+    #         item["value"] = new_data["name_on_id"]
+    #     elif item["key"] == "docID":
+    #         item["value"] = new_data["name_on_id"]
+    #     elif item["key"] == "docValidity":
+    #         item["value"] = new_data["validity"]
+    #     elif item["key"] == "dateOfIssue":
+    #         item["value"] = new_data["date_issue"]
+    #     elif item["key"] == "matchingScore":
+    #         item["value"] = new_data["similarity_score"]
+    # json.dump(data, file)
+    # file.close()