Spaces:

ImranzamanML
/

image_to_text_ocr_hf

Sleeping

App Files Files Community

image_to_text_ocr_hf / app.py

ImranzamanML

Upload app.py

832ca96 verified 6 months ago

raw

history blame

6.2 kB

	import os
	import numpy as np
	import json
	import shutil
	import requests
	import re as r
	from urllib.request import urlopen
	from datetime import datetime
	import gradio as gr
	import tensorflow as tf
	import keras_ocr
	import cv2
	import csv
	import pandas as pd
	import huggingface_hub
	from huggingface_hub import Repository, upload_file
	import scipy.ndimage.interpolation as inter
	import easyocr
	from datasets import load_dataset, Image
	from PIL import Image as PILImage
	from paddleocr import PaddleOCR
	import pytesseract
	import torch
	import spaces

	# Global Variables
	HF_TOKEN = os.environ.get("HF_TOKEN")
	DATASET_NAME = "image_to_text_ocr"
	DATASET_REPO_URL = "https://huggingface.co/ImranzamanML/image_to_text_ocr"
	DATA_FILENAME = "ocr_data.csv"
	DATA_FILE_PATH = os.path.join("ocr_data", DATA_FILENAME)
	DATASET_REPO_ID = "ImranzamanML/image_to_text_ocr"
	REPOSITORY_DIR = "data"
	LOCAL_DIR = 'data_local'
	os.makedirs(LOCAL_DIR, exist_ok=True)

	"""
	OCR using PaddleOCR
	"""
	@spaces.GPU
	def paddle_ocr_processor(image):
	final_text = ''
	ocr = PaddleOCR(use_gpu=True, lang='en', use_angle_cls=True)
	result = ocr.ocr(image)
	for i in range(len(result[0])):
	text = result[0][i][1][0]
	final_text += ' ' + text
	return final_text

	"""
	OCR using Keras OCR
	"""
	@spaces.GPU
	def keras_ocr_processor(image):
	output_text = ''
	pipeline = keras_ocr.pipeline.Pipeline()
	images = [keras_ocr.tools.read(image)]
	predictions = pipeline.recognize(images)
	first_prediction = predictions[0]
	for text, box in first_prediction:
	output_text += ' ' + text
	return output_text

	"""
	OCR using EasyOCR
	"""
	def convert_to_grayscale(image):
	return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

	def apply_thresholding(src):
	return cv2.threshold(src, 127, 255, cv2.THRESH_TOZERO)[1]

	@spaces.GPU
	def easy_ocr_processor(image):
	gray_image = convert_to_grayscale(image)
	apply_thresholding(gray_image)
	cv2.imwrite('processed_image.png', gray_image)
	reader = easyocr.Reader(['th', 'en'])
	detected_text = reader.readtext('processed_image.png', paragraph="False", detail=0)
	detected_text = ''.join(detected_text)
	return detected_text

	"""
	Utility Functions
	"""
	def save_json(data, filepath):
	with open(filepath, 'w+', encoding="utf8") as f:
	json.dump(data, f)

	def get_ip_address():
	try:
	response = str(urlopen('http://checkip.dyndns.com/').read())
	return r.compile(r'Address: (\d+\.\d+\.\d+\.\d+)').search(response).group(1)
	except Exception as e:
	print("Error while getting IP address -->", e)
	return ''

	def fetch_location(ip_addr):
	try:
	req_data = {"ip": ip_addr, "token": "pkml123"}
	url = "https://demos.pragnakalp.com/get-ip-location"
	headers = {'Content-Type': 'application/json'}
	response = requests.post(url, headers=headers, data=json.dumps(req_data)).json()
	return response
	except Exception as e:
	print("Error while getting location -->", e)
	return {}

	def log_ocr_data(method, text_output, input_image):
	print("Logging OCR data...")
	ip_address = get_ip_address()
	location_info = fetch_location(ip_address)
	timestamp = datetime.now().strftime('%Y-%m-%d %H-%M-%S')
	save_dir = os.path.join(LOCAL_DIR, timestamp)
	os.makedirs(save_dir, exist_ok=True)

	image_filename = os.path.join(save_dir, 'image.png')
	try:
	PILImage.fromarray(input_image).save(image_filename)
	except Exception:
	raise Exception(f"Failed to save image as file")

	metadata_file_path = os.path.join(save_dir, 'metadata.jsonl')
	metadata = {
	'id': timestamp,
	'method': method,
	'file_name': 'image.png',
	'generated_text': text_output,
	'ip': ip_address,
	'location': location_info
	}
	save_json(metadata, metadata_file_path)

	repo_image_path = os.path.join(REPOSITORY_DIR, os.path.join(timestamp, 'image.png'))
	_ = upload_file(
	path_or_fileobj=image_filename,
	path_in_repo=repo_image_path,
	repo_id=DATASET_REPO_ID,
	repo_type='dataset',
	token=HF_TOKEN
	)

	repo_json_path = os.path.join(REPOSITORY_DIR, os.path.join(timestamp, 'metadata.jsonl'))
	_ = upload_file(
	path_or_fileobj=metadata_file_path,
	path_in_repo=repo_json_path,
	repo_id=DATASET_REPO_ID,
	repo_type='dataset',
	token=HF_TOKEN
	)

	repo.git_pull()

	url = 'http://pragnakalpdev35.pythonanywhere.com/HF_space_image_to_text'
	payload = {
	'Method': method,
	'text_output': text_output,
	'img': input_image.tolist(),
	'ip_address': ip_address,
	'loc': location_info
	}
	response = requests.post(url, json=payload)
	print("Mail status code:", response.status_code)

	return "*** Logs saved successfully! ***"

	"""
	OCR Generation
	"""
	def generate_ocr_text(method, image):
	text_output = ''
	if image.any():
	if method == 'EasyOCR':
	text_output = easy_ocr_processor(image)
	elif method == 'KerasOCR':
	text_output = keras_ocr_processor(image)
	elif method == 'PaddleOCR':
	text_output = paddle_ocr_processor(image)

	try:
	log_ocr_data(method, text_output, image)
	except Exception as e:
	print(e)
	return text_output
	else:
	raise gr.Error("Please upload an image!")

	"""
	Create user interface for OCR demo
	"""
	image_input = gr.Image(label="Upload Image")
	method_input = gr.Radio(["PaddleOCR", "EasyOCR", "KerasOCR"], value="PaddleOCR", label="Select OCR Method")
	output_textbox = gr.Textbox(label="Recognized Text")

	demo = gr.Interface(
	fn=generate_ocr_text,
	inputs=[method_input, image_input],
	outputs=output_textbox,
	title="Enhanced OCR Demo",
	description="Choose an OCR method and upload an image to extract text.",
	theme="huggingface",
	css="""
	.gradio-container {background-color: #f5f5f5; font-family: Arial, sans-serif;}
	#method_input {background-color: #FFC107; font-size: 18px; padding: 10px;}
	#output_textbox {font-size: 16px; color: #333;}
	"""
	)

	demo.launch()