PriyankaSatish's picture
Rename seg_final.py to app.py
a516b24 verified
raw
history blame contribute delete
No virus
4.32 kB
from google.cloud import vision
import streamlit as st
from google.oauth2 import service_account
import os
import io
import re
import pandas as pd
from PIL import Image
# Provide the path to your service account key
service_account_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = r'gcv-new-project-dd6ed833cc91.json'
client = vision.ImageAnnotatorClient()
def extract_text_by_column(image_bytes, column, width):
"""
Extracts text from a specified column (left, middle, right) of an image.
:param image_bytes: Bytes of the image file
:param column: Column to extract text from (1 for left, 2 for middle, 3 for right)
:param width: The width of the image to calculate the bounding box
:return: Extracted text from the specified column
"""
# Convert bytes data to an image
image = Image.open(io.BytesIO(image_bytes))
# Define the bounding box for the column
left = (column - 1) * width // 3
right = column * width // 3
# Crop the image to the specified column
column_img = image.crop((left, 0, right, image.height))
# Convert the cropped image to bytes
img_byte_arr = io.BytesIO()
column_img.save(img_byte_arr, format='PNG')
img_byte_arr = img_byte_arr.getvalue()
# Perform text detection on the cropped image
image = vision.Image(content=img_byte_arr)
response = client.text_detection(image=image)
texts = response.text_annotations
# Return the first annotation (full text)
return texts[0].description if texts else ''
def normalize_and_extract(text):
# Define regex patterns for each piece of information
patterns = {
'Name': r"Name\s*:\s*([^\n-]+)",
'Relation Name': r"(Husband|Father|Mother|Other)\s*Name\s*:\s*([^\n-]+)",
'House Number': r"House Number\s*:\s*([^\n]+)",
'Age': r"Age\s*:\s*(\d+)",
'Gender': r"Gender\s*:\s*(Female|Male)"
}
# Normalize text to remove extraneous words and characters
normalized_text = re.sub(r"Photo|Available", "", text)
# Search the text for each pattern and extract the corresponding information
voter_info_list = []
for entry in normalized_text.split('\n\n'): # Split entries by double newlines
voter_info = {}
for key, pattern in patterns.items():
match = re.search(pattern, entry)
if match:
# Normalize all relation names to 'Relation Name'
if 'Relation' in key:
voter_info['Relation Name'] = match.group(2).strip()
else:
voter_info[key] = match.group(1).strip()
if voter_info: # Only add non-empty records
voter_info_list.append(voter_info)
return voter_info_list
st.title('Voter Information Extraction')
uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
if uploaded_file is not None:
# To read file as bytes:
bytes_data = uploaded_file.getvalue()
image = Image.open(io.BytesIO(bytes_data))
width = image.width # Get the width of the image for column division
# Display the uploaded image
st.image(image, caption='Uploaded Image.', use_column_width=True)
# If the user confirms to process the image
if st.button('Extract Text'):
# Extract text from each column
left_column_text = extract_text_by_column(bytes_data, 1, width)
middle_column_text = extract_text_by_column(bytes_data, 2, width)
right_column_text = extract_text_by_column(bytes_data, 3, width)
# Normalize and extract the information
left_voter_info = normalize_and_extract(left_column_text)
middle_voter_info = normalize_and_extract(middle_column_text)
right_voter_info = normalize_and_extract(right_column_text)
# Combine all column info into one dataframe
all_voter_info = left_voter_info + middle_voter_info + right_voter_info
voter_df = pd.DataFrame(all_voter_info)
# Display the DataFrame in the Streamlit app
st.dataframe(voter_df)
# Optional: provide download link for the data
st.download_button(label="Download data as CSV", data=voter_df.to_csv(index=False), file_name='voter_info.csv', mime='text/csv')