PriyankaSatish commited on
Commit
d948051
1 Parent(s): 01b34ba

Upload 3 files

Browse files
gcv-new-project-dd6ed833cc91.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "service_account",
3
+ "project_id": "gcv-new-project",
4
+ "private_key_id": "dd6ed833cc911ceb9612c5c9a14961cbf639ee69",
5
+ "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDQnjL10mFLtfck\ngMSbuxMROZ+uh4v6DXTcDvWObBG2NqzzQ25aujqk68B4lFcLzMVsCmv1opbYdOtZ\nRf1T0atgjMJHA/CsCZJm0q/DBhZUyMhCSg0hEHkzbOSbIDvRpjwgV/k0ki0FTF/r\nPaPIEutw/z30uCH5plPToYFh+Ulh+DHGmO+fz0nJ/0noRtXQkrnGmt/K5+yQrU3G\nJyCWAZvdOyWw1pcWpPm2GU0CcypDJ0xi804wTD/jB8HdCzmycuZmBX4r9BSbc48t\nxupZk7MVadp1a7rygwva54NEPsEpW29wDRZFqjQBXEKc8golB4zsyBcVrx/NfugJ\nVC43iEUNAgMBAAECggEAHef/4DOXlRXx5v66sKSB2D9Nf+SYkXEqZn2SgCdH5rox\n0vHAuSrnS0ssnVyOmfdx+coIrf7v5vnj3zlkVobBLacgFrQe5Vq97RWY8rlFrEPY\nSYB6H1pQNaoPn5lgWe6dwfk6lWm7IH5RZdTBTOZBcEJ27EUVQcexTq5hcN0ewvdi\nUQB8KVAP+ucuAY69ib9DQ+B7z86VYrQkPpM/9Mf/uDCA7U4MZ6JETSCFbaQrbvb1\nWDHGupXIL+FkUw3SxslvhDv1WtCqSqOidAF3R0hT512AtBXV1bvb1HhpVzQgvRMB\nUvEeTOyMGsZFzyaUMsNAtKgZJHh9nQpJ4gaNsd2cQQKBgQDn/vaUrSetzExnZO8U\nE/p6m8svtn/xSLLaBxGir97xr7lkVrhULB67ikC/RHRJMyvzjUaEg/Rbxa3Tv21J\ns9+pMlQrgrOl4c3ART10QWUix33hUb79EpntMjRJTzqVXlTuTCmhNmQj4rboAnqG\nd7Om++WQsEEHy2r0HplseZy6/QKBgQDmNAHNbKrl/7zxJxYO1ja5UZDHiGR2FR5C\nFH6hIWb2k9ilzxjK1yjdJ99Zxlg5ndO4Hl7p3wL9p934kSWO3TCeTCYfDv9HQtGf\nEdkS469H2+AlPRw3tu/qoe+WSR6ogQ14H1NxMHB1/kOt5AAFICQJebt7uf4ErKgM\n+hzw3tP3UQKBgQDFTuj0RdgrTkfk38kd8Gflgz7p03M5CLVkDQMItD8Omn84QTEA\nFY4Fvm2WvmeQIf9NYmgJKBjZPGS8ZGBUgLDGO9GuN7kGaecNkVXU3BFh5PzAdz/S\nCNH1E++4MbQHeXUOPqRhQdslUhpxdDo0xvV6HV5/Eggc0vqhqKyfv0Z3GQKBgQCW\nWv+WYdSZlticlc/lbuqdTfYHLUGYkqCJvoMa0QtEIDrPyZ1C1xdco29RXqg/MaOY\nTVXm4P8+F8d2U685SaU4rzny7UO1EqKBiBHFMatJfwY+rFRi23yGPrCS8z0wB+J5\nQ9SnSGEb4C/qQtH0hxKikbzvygJsoSy+FRqUBKZjkQKBgCUxqy2EDIkwjPqpHvXO\n80RXmmy5mpNpTP9wtXefundge1Yy5MeCRs8PFTWvvmruodVyKckOa+B09PwcO5Ao\ndWSyH6DPC2C64Rs8ozUXwqDNVoUTvTZiPHwZ3B1ZLONJubysO8NNxh9RrY5g/W6S\novbyqpAzPMuBR6bZ+km835hZ\n-----END PRIVATE KEY-----\n",
6
+ "client_email": "gcv-new@gcv-new-project.iam.gserviceaccount.com",
7
+ "client_id": "110270416071585635646",
8
+ "auth_uri": "https://accounts.google.com/o/oauth2/auth",
9
+ "token_uri": "https://oauth2.googleapis.com/token",
10
+ "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
11
+ "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/gcv-new%40gcv-new-project.iam.gserviceaccount.com",
12
+ "universe_domain": "googleapis.com"
13
+ }
requirements2.txt ADDED
Binary file (3.93 kB). View file
 
seg_final.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from google.cloud import vision
2
+ import streamlit as st
3
+ from google.oauth2 import service_account
4
+ import os
5
+ import io
6
+ import re
7
+ import pandas as pd
8
+ from PIL import Image
9
+
10
+ # Provide the path to your service account key
11
+ service_account_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
12
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = r'gcv-new-project-dd6ed833cc91.json'
13
+
14
+ client = vision.ImageAnnotatorClient()
15
+
16
+ def extract_text_by_column(image_bytes, column, width):
17
+ """
18
+ Extracts text from a specified column (left, middle, right) of an image.
19
+
20
+ :param image_bytes: Bytes of the image file
21
+ :param column: Column to extract text from (1 for left, 2 for middle, 3 for right)
22
+ :param width: The width of the image to calculate the bounding box
23
+ :return: Extracted text from the specified column
24
+ """
25
+ # Convert bytes data to an image
26
+ image = Image.open(io.BytesIO(image_bytes))
27
+
28
+ # Define the bounding box for the column
29
+ left = (column - 1) * width // 3
30
+ right = column * width // 3
31
+
32
+ # Crop the image to the specified column
33
+ column_img = image.crop((left, 0, right, image.height))
34
+
35
+ # Convert the cropped image to bytes
36
+ img_byte_arr = io.BytesIO()
37
+ column_img.save(img_byte_arr, format='PNG')
38
+ img_byte_arr = img_byte_arr.getvalue()
39
+
40
+ # Perform text detection on the cropped image
41
+ image = vision.Image(content=img_byte_arr)
42
+ response = client.text_detection(image=image)
43
+ texts = response.text_annotations
44
+
45
+ # Return the first annotation (full text)
46
+ return texts[0].description if texts else ''
47
+
48
+
49
+ def normalize_and_extract(text):
50
+ # Define regex patterns for each piece of information
51
+ patterns = {
52
+ 'Name': r"Name\s*:\s*([^\n-]+)",
53
+ 'Relation Name': r"(Husband|Father|Mother|Other)\s*Name\s*:\s*([^\n-]+)",
54
+ 'House Number': r"House Number\s*:\s*([^\n]+)",
55
+ 'Age': r"Age\s*:\s*(\d+)",
56
+ 'Gender': r"Gender\s*:\s*(Female|Male)"
57
+ }
58
+
59
+ # Normalize text to remove extraneous words and characters
60
+ normalized_text = re.sub(r"Photo|Available", "", text)
61
+
62
+ # Search the text for each pattern and extract the corresponding information
63
+ voter_info_list = []
64
+ for entry in normalized_text.split('\n\n'): # Split entries by double newlines
65
+ voter_info = {}
66
+ for key, pattern in patterns.items():
67
+ match = re.search(pattern, entry)
68
+ if match:
69
+ # Normalize all relation names to 'Relation Name'
70
+ if 'Relation' in key:
71
+ voter_info['Relation Name'] = match.group(2).strip()
72
+ else:
73
+ voter_info[key] = match.group(1).strip()
74
+ if voter_info: # Only add non-empty records
75
+ voter_info_list.append(voter_info)
76
+
77
+ return voter_info_list
78
+
79
+ st.title('Voter Information Extraction')
80
+
81
+ uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
82
+ if uploaded_file is not None:
83
+ # To read file as bytes:
84
+ bytes_data = uploaded_file.getvalue()
85
+ image = Image.open(io.BytesIO(bytes_data))
86
+ width = image.width # Get the width of the image for column division
87
+
88
+ # Display the uploaded image
89
+ st.image(image, caption='Uploaded Image.', use_column_width=True)
90
+
91
+ # If the user confirms to process the image
92
+ if st.button('Extract Text'):
93
+ # Extract text from each column
94
+ left_column_text = extract_text_by_column(bytes_data, 1, width)
95
+ middle_column_text = extract_text_by_column(bytes_data, 2, width)
96
+ right_column_text = extract_text_by_column(bytes_data, 3, width)
97
+
98
+ # Normalize and extract the information
99
+ left_voter_info = normalize_and_extract(left_column_text)
100
+ middle_voter_info = normalize_and_extract(middle_column_text)
101
+ right_voter_info = normalize_and_extract(right_column_text)
102
+
103
+ # Combine all column info into one dataframe
104
+ all_voter_info = left_voter_info + middle_voter_info + right_voter_info
105
+ voter_df = pd.DataFrame(all_voter_info)
106
+
107
+ # Display the DataFrame in the Streamlit app
108
+ st.dataframe(voter_df)
109
+
110
+ # Optional: provide download link for the data
111
+ st.download_button(label="Download data as CSV", data=voter_df.to_csv(index=False), file_name='voter_info.csv', mime='text/csv')