Spaces:

Insightly2
/

voterlist_conversion

Sleeping

App Files Files Community

PriyankaSatish commited on Apr 5

Commit

d948051

•

1 Parent(s): 01b34ba

Upload 3 files

Browse files

Files changed (3) hide show

gcv-new-project-dd6ed833cc91.json +13 -0
requirements2.txt +0 -0
seg_final.py +111 -0

gcv-new-project-dd6ed833cc91.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "type": "service_account",
+  "project_id": "gcv-new-project",
+  "private_key_id": "dd6ed833cc911ceb9612c5c9a14961cbf639ee69",
+  "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDQnjL10mFLtfck\ngMSbuxMROZ+uh4v6DXTcDvWObBG2NqzzQ25aujqk68B4lFcLzMVsCmv1opbYdOtZ\nRf1T0atgjMJHA/CsCZJm0q/DBhZUyMhCSg0hEHkzbOSbIDvRpjwgV/k0ki0FTF/r\nPaPIEutw/z30uCH5plPToYFh+Ulh+DHGmO+fz0nJ/0noRtXQkrnGmt/K5+yQrU3G\nJyCWAZvdOyWw1pcWpPm2GU0CcypDJ0xi804wTD/jB8HdCzmycuZmBX4r9BSbc48t\nxupZk7MVadp1a7rygwva54NEPsEpW29wDRZFqjQBXEKc8golB4zsyBcVrx/NfugJ\nVC43iEUNAgMBAAECggEAHef/4DOXlRXx5v66sKSB2D9Nf+SYkXEqZn2SgCdH5rox\n0vHAuSrnS0ssnVyOmfdx+coIrf7v5vnj3zlkVobBLacgFrQe5Vq97RWY8rlFrEPY\nSYB6H1pQNaoPn5lgWe6dwfk6lWm7IH5RZdTBTOZBcEJ27EUVQcexTq5hcN0ewvdi\nUQB8KVAP+ucuAY69ib9DQ+B7z86VYrQkPpM/9Mf/uDCA7U4MZ6JETSCFbaQrbvb1\nWDHGupXIL+FkUw3SxslvhDv1WtCqSqOidAF3R0hT512AtBXV1bvb1HhpVzQgvRMB\nUvEeTOyMGsZFzyaUMsNAtKgZJHh9nQpJ4gaNsd2cQQKBgQDn/vaUrSetzExnZO8U\nE/p6m8svtn/xSLLaBxGir97xr7lkVrhULB67ikC/RHRJMyvzjUaEg/Rbxa3Tv21J\ns9+pMlQrgrOl4c3ART10QWUix33hUb79EpntMjRJTzqVXlTuTCmhNmQj4rboAnqG\nd7Om++WQsEEHy2r0HplseZy6/QKBgQDmNAHNbKrl/7zxJxYO1ja5UZDHiGR2FR5C\nFH6hIWb2k9ilzxjK1yjdJ99Zxlg5ndO4Hl7p3wL9p934kSWO3TCeTCYfDv9HQtGf\nEdkS469H2+AlPRw3tu/qoe+WSR6ogQ14H1NxMHB1/kOt5AAFICQJebt7uf4ErKgM\n+hzw3tP3UQKBgQDFTuj0RdgrTkfk38kd8Gflgz7p03M5CLVkDQMItD8Omn84QTEA\nFY4Fvm2WvmeQIf9NYmgJKBjZPGS8ZGBUgLDGO9GuN7kGaecNkVXU3BFh5PzAdz/S\nCNH1E++4MbQHeXUOPqRhQdslUhpxdDo0xvV6HV5/Eggc0vqhqKyfv0Z3GQKBgQCW\nWv+WYdSZlticlc/lbuqdTfYHLUGYkqCJvoMa0QtEIDrPyZ1C1xdco29RXqg/MaOY\nTVXm4P8+F8d2U685SaU4rzny7UO1EqKBiBHFMatJfwY+rFRi23yGPrCS8z0wB+J5\nQ9SnSGEb4C/qQtH0hxKikbzvygJsoSy+FRqUBKZjkQKBgCUxqy2EDIkwjPqpHvXO\n80RXmmy5mpNpTP9wtXefundge1Yy5MeCRs8PFTWvvmruodVyKckOa+B09PwcO5Ao\ndWSyH6DPC2C64Rs8ozUXwqDNVoUTvTZiPHwZ3B1ZLONJubysO8NNxh9RrY5g/W6S\novbyqpAzPMuBR6bZ+km835hZ\n-----END PRIVATE KEY-----\n",
+  "client_email": "gcv-new@gcv-new-project.iam.gserviceaccount.com",
+  "client_id": "110270416071585635646",
+  "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+  "token_uri": "https://oauth2.googleapis.com/token",
+  "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
+  "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/gcv-new%40gcv-new-project.iam.gserviceaccount.com",
+  "universe_domain": "googleapis.com"
+}

requirements2.txt ADDED Viewed

Binary file (3.93 kB). View file

seg_final.py ADDED Viewed

	@@ -0,0 +1,111 @@

+from google.cloud import vision
+import streamlit as st
+from google.oauth2 import service_account
+import os
+import io
+import re
+import pandas as pd
+from PIL import Image
+# Provide the path to your service account key
+service_account_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
+os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = r'gcv-new-project-dd6ed833cc91.json'
+client = vision.ImageAnnotatorClient()
+def extract_text_by_column(image_bytes, column, width):
+    """
+    Extracts text from a specified column (left, middle, right) of an image.
+    :param image_bytes: Bytes of the image file
+    :param column: Column to extract text from (1 for left, 2 for middle, 3 for right)
+    :param width: The width of the image to calculate the bounding box
+    :return: Extracted text from the specified column
+    """
+    # Convert bytes data to an image
+    image = Image.open(io.BytesIO(image_bytes))
+    # Define the bounding box for the column
+    left = (column - 1) * width // 3
+    right = column * width // 3
+    # Crop the image to the specified column
+    column_img = image.crop((left, 0, right, image.height))
+    # Convert the cropped image to bytes
+    img_byte_arr = io.BytesIO()
+    column_img.save(img_byte_arr, format='PNG')
+    img_byte_arr = img_byte_arr.getvalue()
+    # Perform text detection on the cropped image
+    image = vision.Image(content=img_byte_arr)
+    response = client.text_detection(image=image)
+    texts = response.text_annotations
+    # Return the first annotation (full text)
+    return texts[0].description if texts else ''
+def normalize_and_extract(text):
+    # Define regex patterns for each piece of information
+    patterns = {
+        'Name': r"Name\s*:\s*([^\n-]+)",
+        'Relation Name': r"(Husband|Father|Mother|Other)\s*Name\s*:\s*([^\n-]+)",
+        'House Number': r"House Number\s*:\s*([^\n]+)",
+        'Age': r"Age\s*:\s*(\d+)",
+        'Gender': r"Gender\s*:\s*(Female|Male)"
+    }
+    # Normalize text to remove extraneous words and characters
+    normalized_text = re.sub(r"Photo|Available", "", text)
+    # Search the text for each pattern and extract the corresponding information
+    voter_info_list = []
+    for entry in normalized_text.split('\n\n'):  # Split entries by double newlines
+        voter_info = {}
+        for key, pattern in patterns.items():
+            match = re.search(pattern, entry)
+            if match:
+                # Normalize all relation names to 'Relation Name'
+                if 'Relation' in key:
+                    voter_info['Relation Name'] = match.group(2).strip()
+                else:
+                    voter_info[key] = match.group(1).strip()
+        if voter_info:  # Only add non-empty records
+            voter_info_list.append(voter_info)
+    return voter_info_list
+st.title('Voter Information Extraction')
+uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
+if uploaded_file is not None:
+    # To read file as bytes:
+    bytes_data = uploaded_file.getvalue()
+    image = Image.open(io.BytesIO(bytes_data))
+    width = image.width  # Get the width of the image for column division
+    # Display the uploaded image
+    st.image(image, caption='Uploaded Image.', use_column_width=True)
+    # If the user confirms to process the image
+    if st.button('Extract Text'):
+        # Extract text from each column
+        left_column_text = extract_text_by_column(bytes_data, 1, width)
+        middle_column_text = extract_text_by_column(bytes_data, 2, width)
+        right_column_text = extract_text_by_column(bytes_data, 3, width)
+        # Normalize and extract the information
+        left_voter_info = normalize_and_extract(left_column_text)
+        middle_voter_info = normalize_and_extract(middle_column_text)
+        right_voter_info = normalize_and_extract(right_column_text)
+        # Combine all column info into one dataframe
+        all_voter_info = left_voter_info + middle_voter_info + right_voter_info
+        voter_df = pd.DataFrame(all_voter_info)
+        # Display the DataFrame in the Streamlit app
+        st.dataframe(voter_df)
+        # Optional: provide download link for the data
+        st.download_button(label="Download data as CSV", data=voter_df.to_csv(index=False), file_name='voter_info.csv', mime='text/csv')