import os import numpy as np import json import shutil import requests import re as r from urllib.request import urlopen from datetime import datetime import gradio as gr import tensorflow as tf import keras_ocr import cv2 import csv import pandas as pd import huggingface_hub from huggingface_hub import Repository, upload_file import scipy.ndimage.interpolation as inter import easyocr from datasets import load_dataset, Image from PIL import Image as PILImage from paddleocr import PaddleOCR import pytesseract import torch import spaces # Global Variables HF_TOKEN = os.environ.get("HF_TOKEN") DATASET_NAME = "image_to_text_ocr" DATASET_REPO_URL = "https://huggingface.co/ImranzamanML/image_to_text_ocr" DATA_FILENAME = "ocr_data.csv" DATA_FILE_PATH = os.path.join("ocr_data", DATA_FILENAME) DATASET_REPO_ID = "ImranzamanML/image_to_text_ocr" REPOSITORY_DIR = "data" LOCAL_DIR = 'data_local' os.makedirs(LOCAL_DIR, exist_ok=True) """ OCR using PaddleOCR """ @spaces.GPU def paddle_ocr_processor(image): final_text = '' ocr = PaddleOCR(use_gpu=True, lang='en', use_angle_cls=True) result = ocr.ocr(image) for i in range(len(result[0])): text = result[0][i][1][0] final_text += ' ' + text return final_text """ OCR using Keras OCR """ @spaces.GPU def keras_ocr_processor(image): output_text = '' pipeline = keras_ocr.pipeline.Pipeline() images = [keras_ocr.tools.read(image)] predictions = pipeline.recognize(images) first_prediction = predictions[0] for text, box in first_prediction: output_text += ' ' + text return output_text """ OCR using EasyOCR """ def convert_to_grayscale(image): return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) def apply_thresholding(src): return cv2.threshold(src, 127, 255, cv2.THRESH_TOZERO)[1] @spaces.GPU def easy_ocr_processor(image): gray_image = convert_to_grayscale(image) apply_thresholding(gray_image) cv2.imwrite('processed_image.png', gray_image) reader = easyocr.Reader(['th', 'en']) detected_text = reader.readtext('processed_image.png', paragraph="False", detail=0) detected_text = ''.join(detected_text) return detected_text """ Utility Functions """ def save_json(data, filepath): with open(filepath, 'w+', encoding="utf8") as f: json.dump(data, f) def get_ip_address(): try: response = str(urlopen('http://checkip.dyndns.com/').read()) return r.compile(r'Address: (\d+\.\d+\.\d+\.\d+)').search(response).group(1) except Exception as e: print("Error while getting IP address -->", e) return '' def fetch_location(ip_addr): try: req_data = {"ip": ip_addr, "token": "pkml123"} url = "https://demos.pragnakalp.com/get-ip-location" headers = {'Content-Type': 'application/json'} response = requests.post(url, headers=headers, data=json.dumps(req_data)).json() return response except Exception as e: print("Error while getting location -->", e) return {} def log_ocr_data(method, text_output, input_image): print("Logging OCR data...") ip_address = get_ip_address() location_info = fetch_location(ip_address) timestamp = datetime.now().strftime('%Y-%m-%d %H-%M-%S') save_dir = os.path.join(LOCAL_DIR, timestamp) os.makedirs(save_dir, exist_ok=True) image_filename = os.path.join(save_dir, 'image.png') try: PILImage.fromarray(input_image).save(image_filename) except Exception: raise Exception(f"Failed to save image as file") metadata_file_path = os.path.join(save_dir, 'metadata.jsonl') metadata = { 'id': timestamp, 'method': method, 'file_name': 'image.png', 'generated_text': text_output, 'ip': ip_address, 'location': location_info } save_json(metadata, metadata_file_path) repo_image_path = os.path.join(REPOSITORY_DIR, os.path.join(timestamp, 'image.png')) _ = upload_file( path_or_fileobj=image_filename, path_in_repo=repo_image_path, repo_id=DATASET_REPO_ID, repo_type='dataset', token=HF_TOKEN ) repo_json_path = os.path.join(REPOSITORY_DIR, os.path.join(timestamp, 'metadata.jsonl')) _ = upload_file( path_or_fileobj=metadata_file_path, path_in_repo=repo_json_path, repo_id=DATASET_REPO_ID, repo_type='dataset', token=HF_TOKEN ) repo.git_pull() url = 'http://pragnakalpdev35.pythonanywhere.com/HF_space_image_to_text' payload = { 'Method': method, 'text_output': text_output, 'img': input_image.tolist(), 'ip_address': ip_address, 'loc': location_info } response = requests.post(url, json=payload) print("Mail status code:", response.status_code) return "***** Logs saved successfully! *****" """ OCR Generation """ def generate_ocr_text(method, image): text_output = '' if image.any(): if method == 'EasyOCR': text_output = easy_ocr_processor(image) elif method == 'KerasOCR': text_output = keras_ocr_processor(image) elif method == 'PaddleOCR': text_output = paddle_ocr_processor(image) try: log_ocr_data(method, text_output, image) except Exception as e: print(e) return text_output else: raise gr.Error("Please upload an image!") """ Create user interface for OCR demo """ image_input = gr.Image(label="Upload Image") method_input = gr.Radio(["PaddleOCR", "EasyOCR", "KerasOCR"], value="PaddleOCR", label="Select OCR Method") output_textbox = gr.Textbox(label="Recognized Text") demo = gr.Interface( fn=generate_ocr_text, inputs=[method_input, image_input], outputs=output_textbox, title="Enhanced OCR Demo", description="Choose an OCR method and upload an image to extract text.", theme="huggingface", css=""" .gradio-container {background-color: #f5f5f5; font-family: Arial, sans-serif;} #method_input {background-color: #FFC107; font-size: 18px; padding: 10px;} #output_textbox {font-size: 16px; color: #333;} """ ) demo.launch()