ImranzamanML's picture
Upload app.py
832ca96 verified
raw
history blame
6.2 kB
import os
import numpy as np
import json
import shutil
import requests
import re as r
from urllib.request import urlopen
from datetime import datetime
import gradio as gr
import tensorflow as tf
import keras_ocr
import cv2
import csv
import pandas as pd
import huggingface_hub
from huggingface_hub import Repository, upload_file
import scipy.ndimage.interpolation as inter
import easyocr
from datasets import load_dataset, Image
from PIL import Image as PILImage
from paddleocr import PaddleOCR
import pytesseract
import torch
import spaces
# Global Variables
HF_TOKEN = os.environ.get("HF_TOKEN")
DATASET_NAME = "image_to_text_ocr"
DATASET_REPO_URL = "https://huggingface.co/ImranzamanML/image_to_text_ocr"
DATA_FILENAME = "ocr_data.csv"
DATA_FILE_PATH = os.path.join("ocr_data", DATA_FILENAME)
DATASET_REPO_ID = "ImranzamanML/image_to_text_ocr"
REPOSITORY_DIR = "data"
LOCAL_DIR = 'data_local'
os.makedirs(LOCAL_DIR, exist_ok=True)
"""
OCR using PaddleOCR
"""
@spaces.GPU
def paddle_ocr_processor(image):
final_text = ''
ocr = PaddleOCR(use_gpu=True, lang='en', use_angle_cls=True)
result = ocr.ocr(image)
for i in range(len(result[0])):
text = result[0][i][1][0]
final_text += ' ' + text
return final_text
"""
OCR using Keras OCR
"""
@spaces.GPU
def keras_ocr_processor(image):
output_text = ''
pipeline = keras_ocr.pipeline.Pipeline()
images = [keras_ocr.tools.read(image)]
predictions = pipeline.recognize(images)
first_prediction = predictions[0]
for text, box in first_prediction:
output_text += ' ' + text
return output_text
"""
OCR using EasyOCR
"""
def convert_to_grayscale(image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
def apply_thresholding(src):
return cv2.threshold(src, 127, 255, cv2.THRESH_TOZERO)[1]
@spaces.GPU
def easy_ocr_processor(image):
gray_image = convert_to_grayscale(image)
apply_thresholding(gray_image)
cv2.imwrite('processed_image.png', gray_image)
reader = easyocr.Reader(['th', 'en'])
detected_text = reader.readtext('processed_image.png', paragraph="False", detail=0)
detected_text = ''.join(detected_text)
return detected_text
"""
Utility Functions
"""
def save_json(data, filepath):
with open(filepath, 'w+', encoding="utf8") as f:
json.dump(data, f)
def get_ip_address():
try:
response = str(urlopen('http://checkip.dyndns.com/').read())
return r.compile(r'Address: (\d+\.\d+\.\d+\.\d+)').search(response).group(1)
except Exception as e:
print("Error while getting IP address -->", e)
return ''
def fetch_location(ip_addr):
try:
req_data = {"ip": ip_addr, "token": "pkml123"}
url = "https://demos.pragnakalp.com/get-ip-location"
headers = {'Content-Type': 'application/json'}
response = requests.post(url, headers=headers, data=json.dumps(req_data)).json()
return response
except Exception as e:
print("Error while getting location -->", e)
return {}
def log_ocr_data(method, text_output, input_image):
print("Logging OCR data...")
ip_address = get_ip_address()
location_info = fetch_location(ip_address)
timestamp = datetime.now().strftime('%Y-%m-%d %H-%M-%S')
save_dir = os.path.join(LOCAL_DIR, timestamp)
os.makedirs(save_dir, exist_ok=True)
image_filename = os.path.join(save_dir, 'image.png')
try:
PILImage.fromarray(input_image).save(image_filename)
except Exception:
raise Exception(f"Failed to save image as file")
metadata_file_path = os.path.join(save_dir, 'metadata.jsonl')
metadata = {
'id': timestamp,
'method': method,
'file_name': 'image.png',
'generated_text': text_output,
'ip': ip_address,
'location': location_info
}
save_json(metadata, metadata_file_path)
repo_image_path = os.path.join(REPOSITORY_DIR, os.path.join(timestamp, 'image.png'))
_ = upload_file(
path_or_fileobj=image_filename,
path_in_repo=repo_image_path,
repo_id=DATASET_REPO_ID,
repo_type='dataset',
token=HF_TOKEN
)
repo_json_path = os.path.join(REPOSITORY_DIR, os.path.join(timestamp, 'metadata.jsonl'))
_ = upload_file(
path_or_fileobj=metadata_file_path,
path_in_repo=repo_json_path,
repo_id=DATASET_REPO_ID,
repo_type='dataset',
token=HF_TOKEN
)
repo.git_pull()
url = 'http://pragnakalpdev35.pythonanywhere.com/HF_space_image_to_text'
payload = {
'Method': method,
'text_output': text_output,
'img': input_image.tolist(),
'ip_address': ip_address,
'loc': location_info
}
response = requests.post(url, json=payload)
print("Mail status code:", response.status_code)
return "***** Logs saved successfully! *****"
"""
OCR Generation
"""
def generate_ocr_text(method, image):
text_output = ''
if image.any():
if method == 'EasyOCR':
text_output = easy_ocr_processor(image)
elif method == 'KerasOCR':
text_output = keras_ocr_processor(image)
elif method == 'PaddleOCR':
text_output = paddle_ocr_processor(image)
try:
log_ocr_data(method, text_output, image)
except Exception as e:
print(e)
return text_output
else:
raise gr.Error("Please upload an image!")
"""
Create user interface for OCR demo
"""
image_input = gr.Image(label="Upload Image")
method_input = gr.Radio(["PaddleOCR", "EasyOCR", "KerasOCR"], value="PaddleOCR", label="Select OCR Method")
output_textbox = gr.Textbox(label="Recognized Text")
demo = gr.Interface(
fn=generate_ocr_text,
inputs=[method_input, image_input],
outputs=output_textbox,
title="Enhanced OCR Demo",
description="Choose an OCR method and upload an image to extract text.",
theme="huggingface",
css="""
.gradio-container {background-color: #f5f5f5; font-family: Arial, sans-serif;}
#method_input {background-color: #FFC107; font-size: 18px; padding: 10px;}
#output_textbox {font-size: 16px; color: #333;}
"""
)
demo.launch()