Spaces:

sairamtelagamsetti
/

ocr

Runtime error

App Files Files Community

ocr / app.py

sairamtelagamsetti

Update app.py

3bc1462 verified 7 months ago

raw

history blame contribute delete

4.75 kB

	import cv2
	import torch
	from transformers import DetrImageProcessor, DetrForObjectDetection, TrOCRProcessor, VisionEncoderDecoderModel
	from PIL import Image
	from datetime import datetime

	# Ensure all required libraries are installed
	try:
	import timm # Required by DETR
	except ImportError:
	raise ImportError("The 'timm' library is required but not installed. Install it using 'pip install timm'.")

	# Load the DETR model for object detection (license plate detection)
	try:
	detr_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
	detr_model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
	except Exception as e:
	raise RuntimeError(f"Error initializing DETR model: {e}")

	# Load the TrOCR model for OCR (license plate text recognition)
	try:
	trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
	trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
	except Exception as e:
	raise RuntimeError(f"Error initializing TrOCR model: {e}")

	def detect_license_plate(frame):
	"""
	Detect license plates in a video frame using DETR.
	"""
	# Convert the frame to a PIL image
	pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))

	# Preprocess the image for DETR
	inputs = detr_processor(images=pil_image, return_tensors="pt")
	outputs = detr_model(**inputs)

	# Get detected objects and filter for license plates
	logits = outputs.logits
	boxes = outputs.pred_boxes
	probas = logits.softmax(-1)[0, :, :-1]
	keep = probas.max(-1).values > 0.9 # Confidence threshold

	detected_boxes = []
	for box, score in zip(boxes[keep], probas[keep]):
	# Convert box coordinates to pixel values
	box = box.detach().cpu().numpy()
	detected_boxes.append(box)

	return detected_boxes

	def recognize_text(plate_image):
	"""
	Recognize text from a license plate image using TrOCR.
	"""
	# Convert the license plate image to a PIL image
	pil_image = Image.fromarray(cv2.cvtColor(plate_image, cv2.COLOR_BGR2RGB))

	# Preprocess the image for TrOCR
	pixel_values = trocr_processor(images=pil_image, return_tensors="pt").pixel_values
	generated_ids = trocr_model.generate(pixel_values)
	text = trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

	return text.strip()

	def process_video(video_path, frame_skip=5):
	"""
	Process a video to detect license plates and log entry/exit times.
	"""
	cap = cv2.VideoCapture(video_path)
	vehicle_data = {}
	frame_count = 0

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break

	frame_count += 1
	if frame_count % frame_skip != 0:
	continue # Skip frames to optimize processing time

	# Detect license plates
	detected_boxes = detect_license_plate(frame)

	for box in detected_boxes:
	x_min, y_min, x_max, y_max = map(int, box)
	license_plate_image = frame[y_min:y_max, x_min:x_max]

	# Recognize text from the license plate
	license_plate = recognize_text(license_plate_image)

	if license_plate:
	current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

	if license_plate not in vehicle_data:
	# Vehicle entering
	vehicle_data[license_plate] = {'entry_time': current_time, 'exit_time': None}
	print(f"Vehicle {license_plate} entered at {current_time}")
	else:
	# Update exit time
	vehicle_data[license_plate]['exit_time'] = current_time

	# Draw bounding box and license plate text
	cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)
	cv2.putText(frame, license_plate, (x_min, y_min-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

	# Display the frame (optional, can be removed for headless environments)
	cv2.imshow('Vehicle Detection', frame)

	# Break on 'q' key press
	if cv2.waitKey(1) & 0xFF == ord('q'):
	break

	cap.release()
	cv2.destroyAllWindows()

	# Print vehicle data
	print("\nVehicle Data:")
	for plate, times in vehicle_data.items():
	print(f"License Plate: {plate}, Entry Time: {times['entry_time']}, Exit Time: {times['exit_time']}")

	if __name__ == "__main__":
	# Replace 'road_video.mp4' with the path to your video file or use 0 for webcam
	process_video("road_video.mp4", frame_skip=5)