Spaces:

Insightly2
/

voterlist_conversion

Sleeping

App Files Files Community

voterlist_conversion / app.py

PriyankaSatish

Rename seg_final.py to app.py

a516b24 verified 3 months ago

raw

history blame contribute delete

No virus

4.32 kB

	from google.cloud import vision
	import streamlit as st
	from google.oauth2 import service_account
	import os
	import io
	import re
	import pandas as pd
	from PIL import Image

	# Provide the path to your service account key
	service_account_path = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
	os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = r'gcv-new-project-dd6ed833cc91.json'

	client = vision.ImageAnnotatorClient()

	def extract_text_by_column(image_bytes, column, width):
	"""
	Extracts text from a specified column (left, middle, right) of an image.

	:param image_bytes: Bytes of the image file
	:param column: Column to extract text from (1 for left, 2 for middle, 3 for right)
	:param width: The width of the image to calculate the bounding box
	:return: Extracted text from the specified column
	"""
	# Convert bytes data to an image
	image = Image.open(io.BytesIO(image_bytes))

	# Define the bounding box for the column
	left = (column - 1) * width // 3
	right = column * width // 3

	# Crop the image to the specified column
	column_img = image.crop((left, 0, right, image.height))

	# Convert the cropped image to bytes
	img_byte_arr = io.BytesIO()
	column_img.save(img_byte_arr, format='PNG')
	img_byte_arr = img_byte_arr.getvalue()

	# Perform text detection on the cropped image
	image = vision.Image(content=img_byte_arr)
	response = client.text_detection(image=image)
	texts = response.text_annotations

	# Return the first annotation (full text)
	return texts[0].description if texts else ''


	def normalize_and_extract(text):
	# Define regex patterns for each piece of information
	patterns = {
	'Name': r"Name\s:\s([^\n-]+)",
	'Relation Name': r"(Husband\|Father\|Mother\|Other)\sName\s:\s*([^\n-]+)",
	'House Number': r"House Number\s:\s([^\n]+)",
	'Age': r"Age\s:\s(\d+)",
	'Gender': r"Gender\s:\s(Female\|Male)"
	}

	# Normalize text to remove extraneous words and characters
	normalized_text = re.sub(r"Photo\|Available", "", text)

	# Search the text for each pattern and extract the corresponding information
	voter_info_list = []
	for entry in normalized_text.split('\n\n'): # Split entries by double newlines
	voter_info = {}
	for key, pattern in patterns.items():
	match = re.search(pattern, entry)
	if match:
	# Normalize all relation names to 'Relation Name'
	if 'Relation' in key:
	voter_info['Relation Name'] = match.group(2).strip()
	else:
	voter_info[key] = match.group(1).strip()
	if voter_info: # Only add non-empty records
	voter_info_list.append(voter_info)

	return voter_info_list

	st.title('Voter Information Extraction')

	uploaded_file = st.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
	if uploaded_file is not None:
	# To read file as bytes:
	bytes_data = uploaded_file.getvalue()
	image = Image.open(io.BytesIO(bytes_data))
	width = image.width # Get the width of the image for column division

	# Display the uploaded image
	st.image(image, caption='Uploaded Image.', use_column_width=True)

	# If the user confirms to process the image
	if st.button('Extract Text'):
	# Extract text from each column
	left_column_text = extract_text_by_column(bytes_data, 1, width)
	middle_column_text = extract_text_by_column(bytes_data, 2, width)
	right_column_text = extract_text_by_column(bytes_data, 3, width)

	# Normalize and extract the information
	left_voter_info = normalize_and_extract(left_column_text)
	middle_voter_info = normalize_and_extract(middle_column_text)
	right_voter_info = normalize_and_extract(right_column_text)

	# Combine all column info into one dataframe
	all_voter_info = left_voter_info + middle_voter_info + right_voter_info
	voter_df = pd.DataFrame(all_voter_info)

	# Display the DataFrame in the Streamlit app
	st.dataframe(voter_df)

	# Optional: provide download link for the data
	st.download_button(label="Download data as CSV", data=voter_df.to_csv(index=False), file_name='voter_info.csv', mime='text/csv')