Spaces:

nit454
/

paddle_ocr_testing

Build error

paddle_ocr_testing / app.py

Update app.py

8df9245 verified about 2 months ago

1.38 kB

	import numpy as np
	import random
	from paddleocr import PaddleOCR
	from difflib import SequenceMatcher

	# Initialize PaddleOCR with English language (CPU)
	ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False)

	def calculate_similarity(text1, text2):
	return SequenceMatcher(None, text1.lower().strip(), text2.lower().strip()).ratio()

	def paddleocr_with_accuracy(image_path, correct_text):
	try:
	# Perform OCR on the image file path
	results = ocr.ocr(image_path, cls=True)

	# Extract recognized text lines
	detected_text_lines = [line[1][0] for line in results]
	detected_text = "\n".join(detected_text_lines)

	# Calculate accuracy score as similarity ratio
	accuracy = calculate_similarity(detected_text, correct_text)

	# Simulate pipeline integration score (here same as accuracy)
	pipeline_score = accuracy

	print("OCR Detected Text:\n", detected_text)
	print(f"\nAccuracy: {accuracy:.2%}")
	print(f"Pipeline Integration Score: {pipeline_score:.2%}")

	except Exception as e:
	print(f"PaddleOCR Error: {str(e)}")

	# Example usage
	if __name__ == "__main__":
	image_file = "your_image.jpg" # replace with your image path
	ground_truth_text = """Enter the exact expected text from the image here."""

	paddleocr_with_accuracy(image_file, ground_truth_text)