Spaces:

ImranzamanML
/

image_to_text_ocr_hf

Running

App Files Files Community

ImranzamanML commited on Aug 20, 2024

Commit

832ca96

verified ·

1 Parent(s): d5d03a7

Upload app.py

Browse files

Files changed (1) hide show

app.py +209 -0

app.py ADDED Viewed

	@@ -0,0 +1,209 @@

+import os
+import numpy as np
+import json
+import shutil
+import requests
+import re as r
+from urllib.request import urlopen
+from datetime import datetime
+import gradio as gr
+import tensorflow as tf
+import keras_ocr
+import cv2
+import csv
+import pandas as pd
+import huggingface_hub
+from huggingface_hub import Repository, upload_file
+import scipy.ndimage.interpolation as inter
+import easyocr
+from datasets import load_dataset, Image
+from PIL import Image as PILImage
+from paddleocr import PaddleOCR
+import pytesseract
+import torch
+import spaces
+# Global Variables
+HF_TOKEN = os.environ.get("HF_TOKEN")
+DATASET_NAME = "image_to_text_ocr"
+DATASET_REPO_URL = "https://huggingface.co/ImranzamanML/image_to_text_ocr"
+DATA_FILENAME = "ocr_data.csv"
+DATA_FILE_PATH = os.path.join("ocr_data", DATA_FILENAME)
+DATASET_REPO_ID = "ImranzamanML/image_to_text_ocr"
+REPOSITORY_DIR = "data"
+LOCAL_DIR = 'data_local'
+os.makedirs(LOCAL_DIR, exist_ok=True)
+"""
+OCR using PaddleOCR
+"""
+@spaces.GPU
+def paddle_ocr_processor(image):
+    final_text = ''
+    ocr = PaddleOCR(use_gpu=True, lang='en', use_angle_cls=True)
+    result = ocr.ocr(image)
+    for i in range(len(result[0])):
+        text = result[0][i][1][0]
+        final_text += ' ' + text
+    return final_text
+"""
+OCR using Keras OCR
+"""
+@spaces.GPU
+def keras_ocr_processor(image):
+    output_text = ''
+    pipeline = keras_ocr.pipeline.Pipeline()
+    images = [keras_ocr.tools.read(image)]
+    predictions = pipeline.recognize(images)
+    first_prediction = predictions[0]
+    for text, box in first_prediction:
+        output_text += ' ' + text
+    return output_text
+"""
+OCR using EasyOCR
+"""
+def convert_to_grayscale(image):
+    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+def apply_thresholding(src):
+    return cv2.threshold(src, 127, 255, cv2.THRESH_TOZERO)[1]
+@spaces.GPU
+def easy_ocr_processor(image):
+    gray_image = convert_to_grayscale(image)
+    apply_thresholding(gray_image)
+    cv2.imwrite('processed_image.png', gray_image)
+    reader = easyocr.Reader(['th', 'en'])
+    detected_text = reader.readtext('processed_image.png', paragraph="False", detail=0)
+    detected_text = ''.join(detected_text)
+    return detected_text
+"""
+Utility Functions
+"""
+def save_json(data, filepath):
+    with open(filepath, 'w+', encoding="utf8") as f:
+        json.dump(data, f)
+def get_ip_address():
+    try:
+        response = str(urlopen('http://checkip.dyndns.com/').read())
+        return r.compile(r'Address: (\d+\.\d+\.\d+\.\d+)').search(response).group(1)
+    except Exception as e:
+        print("Error while getting IP address -->", e)
+        return ''
+def fetch_location(ip_addr):
+    try:
+        req_data = {"ip": ip_addr, "token": "pkml123"}
+        url = "https://demos.pragnakalp.com/get-ip-location"
+        headers = {'Content-Type': 'application/json'}
+        response = requests.post(url, headers=headers, data=json.dumps(req_data)).json()
+        return response
+    except Exception as e:
+        print("Error while getting location -->", e)
+        return {}
+def log_ocr_data(method, text_output, input_image):
+    print("Logging OCR data...")
+    ip_address = get_ip_address()
+    location_info = fetch_location(ip_address)
+    timestamp = datetime.now().strftime('%Y-%m-%d %H-%M-%S')
+    save_dir = os.path.join(LOCAL_DIR, timestamp)
+    os.makedirs(save_dir, exist_ok=True)
+    image_filename = os.path.join(save_dir, 'image.png')
+    try:
+        PILImage.fromarray(input_image).save(image_filename)
+    except Exception:
+        raise Exception(f"Failed to save image as file")
+    metadata_file_path = os.path.join(save_dir, 'metadata.jsonl')
+    metadata = {
+        'id': timestamp,
+        'method': method,
+        'file_name': 'image.png',
+        'generated_text': text_output,
+        'ip': ip_address,
+        'location': location_info
+    }
+    save_json(metadata, metadata_file_path)
+    repo_image_path = os.path.join(REPOSITORY_DIR, os.path.join(timestamp, 'image.png'))
+    _ = upload_file(
+        path_or_fileobj=image_filename,
+        path_in_repo=repo_image_path,
+        repo_id=DATASET_REPO_ID,
+        repo_type='dataset',
+        token=HF_TOKEN
+    )
+    repo_json_path = os.path.join(REPOSITORY_DIR, os.path.join(timestamp, 'metadata.jsonl'))
+    _ = upload_file(
+        path_or_fileobj=metadata_file_path,
+        path_in_repo=repo_json_path,
+        repo_id=DATASET_REPO_ID,
+        repo_type='dataset',
+        token=HF_TOKEN
+    )
+    repo.git_pull()
+    url = 'http://pragnakalpdev35.pythonanywhere.com/HF_space_image_to_text'
+    payload = {
+        'Method': method,
+        'text_output': text_output,
+        'img': input_image.tolist(),
+        'ip_address': ip_address,
+        'loc': location_info
+    }
+    response = requests.post(url, json=payload)
+    print("Mail status code:", response.status_code)
+    return "***** Logs saved successfully! *****"
+"""
+OCR Generation
+"""
+def generate_ocr_text(method, image):
+    text_output = ''
+    if image.any():
+        if method == 'EasyOCR':
+            text_output = easy_ocr_processor(image)
+        elif method == 'KerasOCR':
+            text_output = keras_ocr_processor(image)
+        elif method == 'PaddleOCR':
+            text_output = paddle_ocr_processor(image)
+        try:
+            log_ocr_data(method, text_output, image)
+        except Exception as e:
+            print(e)
+        return text_output
+    else:
+        raise gr.Error("Please upload an image!")
+"""
+Create user interface for OCR demo
+"""
+image_input = gr.Image(label="Upload Image")
+method_input = gr.Radio(["PaddleOCR", "EasyOCR", "KerasOCR"], value="PaddleOCR", label="Select OCR Method")
+output_textbox = gr.Textbox(label="Recognized Text")
+demo = gr.Interface(
+    fn=generate_ocr_text,
+    inputs=[method_input, image_input],
+    outputs=output_textbox,
+    title="Enhanced OCR Demo",
+    description="Choose an OCR method and upload an image to extract text.",
+    theme="huggingface",
+    css="""
+    .gradio-container {background-color: #f5f5f5; font-family: Arial, sans-serif;}
+    #method_input {background-color: #FFC107; font-size: 18px; padding: 10px;}
+    #output_textbox {font-size: 16px; color: #333;}
+    """
+)
+demo.launch()