Spaces:
Running
Running
File size: 7,400 Bytes
695a221 3729693 695a221 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
import os
import logging
import cv2
import numpy as np
from pdf2image import convert_from_path
from pytesseract import Output, pytesseract
from scipy.ndimage import rotate
from surya.ocr import run_ocr
from surya.model.detection.model import load_model as load_det_model, load_processor as load_det_processor
from surya.model.recognition.model import load_model as load_rec_model
from surya.model.recognition.processor import load_processor as load_rec_processor
import imutils
import gradio as gr
import subprocess
import glob
from PIL import Image, ImageDraw
from pytesseract import Output
import pytesseract
# Function to correct image skew
def correct_skew(image, delta=0.1, limit=3):
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 41, 15
)
scores = []
angles = np.arange(-limit, limit + delta, delta)
for angle in angles:
_, score = determine_score(thresh, angle)
scores.append(score)
best_angle = angles[scores.index(max(scores))]
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, best_angle, 1.0)
rotated = cv2.warpAffine(
image, M, (w, h), flags=cv2.INTER_LINEAR,
borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 255, 255)
)
print(f"[INFO] Detected skew angle: {best_angle} degrees")
return rotated
def determine_score(arr, angle):
data = rotate(arr, angle, reshape=False, order=0)
histogram = np.sum(data, axis=1, dtype=float)
score = np.sum((histogram[1:] - histogram[:-1]) ** 2, dtype=float)
return histogram, score
def correct_image_rotation(image):
if isinstance(image, Image.Image):
original_size = image.size
print('image original size is:', original_size)
image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
image_required = image.copy()
h, w = image_required.shape[:2]
cropped_rotated = cv2.resize(image_required, (w * 4, h * 4))
results = pytesseract.image_to_osd(
cropped_rotated,
output_type=Output.DICT,
config='--dpi 300 --psm 0 -c min_characters_to_try=5 -c tessedit_script_lang=Arabic'
)
if results["script"] not in ['Bengali', 'Latin', 'Greek', 'Katakana'] and results["orientation"] != 180:
print("[INFO] Detected orientation: {}".format(results["orientation"]))
print("[INFO] Rotate by {} degrees to correct".format(results["rotate"]))
print("[INFO] Detected script: {}".format(results["script"]))
rotated = imutils.rotate_bound(image, angle=results['rotate'])
if results['rotate'] in [90, 270]:
rotated_h, rotated_w = rotated.shape[:2]
original_size = (rotated_w, rotated_h)
print(f"Rotated dimensions: {rotated_w}x{rotated_h}")
if (rotated_w, rotated_h) != (h, w):
rotated = cv2.resize(rotated, (w, h))
else:
print("[INFO] Major orientation is correct, proceeding to fine-tune...")
rotated = image
final_rotated = correct_skew(rotated)
rotated_pil = Image.fromarray(cv2.cvtColor(final_rotated, cv2.COLOR_BGR2RGB))
print('resize the image to its original size: ', original_size)
corrected_image = rotated_pil.resize(original_size, Image.Resampling.LANCZOS)
return corrected_image
# Function to process PDF or image and detect text lines
def process_pdf(file_path):
# Define the results directories
detected_text_dir = "/home/Detected_Text_Line"
detected_layout_dir = "/home/Detected_layout"
ocr_dir = "/home/OCR"
# Ensure the results directories exist
os.makedirs(detected_text_dir, exist_ok=True)
os.makedirs(detected_layout_dir, exist_ok=True)
os.makedirs(ocr_dir, exist_ok=True)
# Extract the PDF name (without extension)
pdf_name = os.path.splitext(os.path.basename(file_path))[0]
# Step 1: Run surya_detect
try:
subprocess.run(
["surya_detect", "--results_dir", detected_text_dir, "--images", file_path],
check=True,
)
print(f"[INFO] surya_detect completed for {file_path}")
except subprocess.CalledProcessError as e:
print(f"[ERROR] surya_detect failed: {e}")
return None
# Step 2: Remove column files (if they exist)
column_files = glob.glob(f"{detected_text_dir}/{pdf_name}/*column*")
if column_files:
try:
subprocess.run(["rm"] + column_files, check=True)
print(f"[INFO] Removed column files for {pdf_name}")
except subprocess.CalledProcessError as e:
print(f"[ERROR] Failed to remove column files: {e}")
else:
print(f"[INFO] No column files found for {pdf_name}")
# Return the path to the directory containing the output images
output_dir = os.path.join(detected_text_dir, pdf_name)
return output_dir
# Function to handle the Gradio interface
def gradio_interface(file):
# Step 1: Correct the skew of the input file
corrected_images = []
if file.name.lower().endswith('.pdf'):
images = convert_from_path(file.name)
for i, image in enumerate(images):
corrected_image = correct_image_rotation(image)
corrected_images.append(corrected_image)
else:
image = Image.open(file.name)
corrected_image = correct_image_rotation(image)
corrected_images.append(corrected_image)
# Save corrected images to a folder
corrected_dir = "/home/Corrected_Images"
os.makedirs(corrected_dir, exist_ok=True)
for i, corrected_image in enumerate(corrected_images):
corrected_image.save(os.path.join(corrected_dir, f"corrected_{i}.png"))
# Step 2: Detect text lines in the corrected images
detected_dir = process_pdf(corrected_dir)
if detected_dir is None:
# Return a placeholder image with an error message
error_image = Image.new("RGB", (400, 200), color="red")
error_draw = ImageDraw.Draw(error_image)
error_draw.text((10, 10), "Error detecting text lines. Check the logs for details.", fill="white")
return corrected_images, [error_image]
# Load and return the detected text line images
detected_images = []
for image_file in sorted(os.listdir(detected_dir)):
if image_file.endswith((".png", ".jpg", ".jpeg")):
image_path = os.path.join(detected_dir, image_file)
detected_images.append(Image.open(image_path))
if not detected_images:
# Return a placeholder image if no output images are found
placeholder_image = Image.new("RGB", (400, 200), color="gray")
placeholder_draw = ImageDraw.Draw(placeholder_image)
placeholder_draw.text((10, 10), "No detected text line images found.", fill="white")
return corrected_images, [placeholder_image]
return corrected_images, detected_images
# Gradio Interface
iface = gr.Interface(
fn=gradio_interface,
inputs=gr.File(label="Upload PDF or Image"),
outputs=[
gr.Gallery(label="Corrected Images", columns=[2], height="auto"),
gr.Gallery(label="Detected Text Lines", columns=[2], height="auto"),
],
title="PDF/Image Skew Correction and Text Line Detection",
description="Upload a PDF or image to correct skew and detect text lines.",
)
iface.launch() |