Spaces:
Runtime error
Runtime error
import glob | |
import json | |
import os | |
import xml.etree.ElementTree as ET | |
import cv2 | |
# from sklearn.externals import joblib | |
import joblib | |
import numpy as np | |
import pandas as pd | |
# from .variables import old_ocr_req_cols | |
# from .skew_correction import PageSkewWraper | |
const_HW = 1.294117647 | |
const_W = 600 | |
# https://www.forbes.com/sites/forbestechcouncil/2020/06/02/leveraging-technologies-to-align-realograms-and-planograms-for-grocery/?sh=506b8b78e86c | |
# https://stackoverflow.com/questions/39403183/python-opencv-sorting-contours | |
# http://devdoc.net/linux/OpenCV-3.2.0/da/d0c/tutorial_bounding_rects_circles.html | |
# https://stackoverflow.com/questions/10297713/find-contour-of-the-set-of-points-in-opencv | |
# https://stackoverflow.com/questions/16538774/dealing-with-contours-and-bounding-rectangle-in-opencv-2-4-python-2-7 | |
# https://stackoverflow.com/questions/50308055/creating-bounding-boxes-for-contours | |
# https://stackoverflow.com/questions/57296398/how-can-i-get-better-results-of-bounding-box-using-find-contours-of-opencv | |
# http://amroamroamro.github.io/mexopencv/opencv/generalContours_demo1.html | |
# https://gist.github.com/bigsnarfdude/d811e31ee17495f82f10db12651ae82d | |
# http://man.hubwiz.com/docset/OpenCV.docset/Contents/Resources/Documents/da/d0c/tutorial_bounding_rects_circles.html | |
# https://www.analyticsvidhya.com/blog/2021/05/document-layout-detection-and-ocr-with-detectron2/ | |
# https://colab.research.google.com/drive/1m6gaQF6Q4M0IaSjoo_4jWllKJjK-i6fw?usp=sharing#scrollTo=lEyl3wYKHAe1 | |
# https://stackoverflow.com/questions/39403183/python-opencv-sorting-contours | |
# https://docs.opencv.org/2.4/doc/tutorials/imgproc/shapedescriptors/bounding_rects_circles/bounding_rects_circles.html | |
# https://www.pyimagesearch.com/2016/03/21/ordering-coordinates-clockwise-with-python-and-opencv/ | |
def bucket_sort(df, colmn, ymax_col="ymax", ymin_col="ymin"): | |
df["line_number"] = 0 | |
colmn.append("line_number") | |
array_value = df[colmn].values | |
start_index = Line_counter = counter = 0 | |
ymax, ymin, line_no = ( | |
colmn.index(ymax_col), | |
colmn.index(ymin_col), | |
colmn.index("line_number"), | |
) | |
while counter < len(array_value): | |
current_ymax = array_value[start_index][ymax] | |
for next_index in range(start_index, len(array_value)): | |
counter += 1 | |
next_ymin = array_value[next_index][ymin] | |
next_ymax = array_value[next_index][ymax] | |
if current_ymax > next_ymin: | |
array_value[next_index][line_no] = Line_counter + 1 | |
# if current_ymax < next_ymax: | |
# current_ymax = next_ymax | |
else: | |
counter -= 1 | |
break | |
# print(counter, len(array_value), start_index) | |
start_index = counter | |
Line_counter += 1 | |
return pd.DataFrame(array_value, columns=colmn) | |
def do_sorting(df): | |
df.sort_values(["ymin", "xmin"], ascending=True, inplace=True) | |
df["idx"] = df.index | |
if "line_number" in df.columns: | |
print("line number removed") | |
df.drop("line_number", axis=1, inplace=True) | |
req_colns = ["xmin", "ymin", "xmax", "ymax", "idx"] | |
temp_df = df.copy() | |
temp = bucket_sort(temp_df.copy(), req_colns) | |
df = df.merge(temp[["idx", "line_number"]], on="idx") | |
df.sort_values(["line_number", "xmin"], ascending=True, inplace=True) | |
df = df.reset_index(drop=True) | |
df = df.reset_index(drop=True) | |
return df | |
def xml_to_csv(xml_file): | |
# https://gist.github.com/rotemtam/88d9a4efae243fc77ed4a0f9917c8f6c | |
xml_list = [] | |
# for xml_file in glob.glob(path + '/*.xml'): | |
# https://discuss.streamlit.io/t/unable-to-read-files-using-standard-file-uploader/2258/2 | |
tree = ET.parse(xml_file) | |
root = tree.getroot() | |
for member in root.findall("object"): | |
bbx = member.find("bndbox") | |
xmin = int(bbx.find("xmin").text) | |
ymin = int(bbx.find("ymin").text) | |
xmax = int(bbx.find("xmax").text) | |
ymax = int(bbx.find("ymax").text) | |
label = member.find("name").text | |
value = ( | |
root.find("filename").text, | |
int(root.find("size")[0].text), | |
int(root.find("size")[1].text), | |
label, | |
xmin, | |
ymin, | |
xmax, | |
ymax, | |
) | |
xml_list.append(value) | |
column_name = [ | |
"filename", | |
"width", | |
"height", | |
"cls", | |
"xmin", | |
"ymin", | |
"xmax", | |
"ymax", | |
] | |
xml_df = pd.DataFrame(xml_list, columns=column_name) | |
return xml_df | |
# def annotate_planogram_compliance(img0, sorted_xml_df, wrong_indexes, target_names): | |
# # annotator = Annotator(img0, line_width=3, pil=True) | |
# det = sorted_xml_df[['xmin', 'ymin', 'xmax', 'ymax','cls']].values | |
# # det[:, :4] = scale_coords((640, 640), det[:, :4], img0.shape).round() | |
# for i, (*xyxy, cls) in enumerate(det): | |
# c = int(cls) # integer class | |
# if i in wrong_indexes: | |
# # print(xyxy, "Wrong detection", (255, 0, 0)) | |
# label = "Wrong detection" | |
# color = (0,0,255) | |
# else: | |
# # print(xyxy, label, (0, 255, 0)) | |
# label = f'{target_names[c]}' | |
# color = (0,255, 0) | |
# org = (int(xyxy[0]), int(xyxy[1]) ) | |
# top_left = org | |
# bottom_right = (int(xyxy[2]), int(xyxy[3])) | |
# # print("#"*50) | |
# # print(f"Anooatting cv2 rectangle with shape: { img0.shape}, top left: { top_left}, bottom right: { bottom_right} , color : { color }, thickness: {3}, cv2.LINE_8") | |
# # print("#"*50) | |
# cv2.rectangle(img0, top_left, bottom_right , color, 3, cv2.LINE_8) | |
# cv2.putText(img0, label, tuple(org), cv2. FONT_HERSHEY_SIMPLEX , 0.5, color) | |
# return img0 | |
def annotate_planogram_compliance( | |
img0, sorted_df, correct_indexes, wrong_indexes, target_names | |
): | |
# annotator = Annotator(img0, line_width=3, pil=True) | |
det = sorted_df[["xmin", "ymin", "xmax", "ymax", "cls"]].values | |
# det[:, :4] = scale_coords((640, 640), det[:, :4], img0.shape).round() | |
for x, y in zip(*correct_indexes): | |
try: | |
row = sorted_df[sorted_df["line_number"] == x + 1].iloc[y] | |
xyxy = row[["xmin", "ymin", "xmax", "ymax"]].values | |
label = f'{target_names[row["cls"]]}' | |
color = (0, 255, 0) | |
# org = (int(xyxy[0]), int(xyxy[1]) ) | |
top_left = (int(row["xmin"]), int(row["ymin"])) | |
bottom_right = (int(row["xmax"]), int(row["ymax"])) | |
cv2.rectangle(img0, top_left, bottom_right, color, 3, cv2.LINE_8) | |
cv2.putText( | |
img0, label, top_left, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color | |
) | |
except Exception as e: | |
print("Error: " + str(e)) | |
continue | |
for x, y in zip(*wrong_indexes): | |
try: | |
row = sorted_df[sorted_df["line_number"] == x + 1].iloc[y] | |
xyxy = row[["xmin", "ymin", "xmax", "ymax"]].values | |
label = f'{target_names[row["cls"]]}' | |
color = (0, 0, 255) | |
# org = (int(xyxy[0]), int(xyxy[1]) ) | |
top_left = (row["xmin"], row["ymin"]) | |
bottom_right = (row["xmax"], row["ymax"]) | |
cv2.rectangle(img0, top_left, bottom_right, color, 3, cv2.LINE_8) | |
cv2.putText( | |
img0, label, top_left, cv2.FONT_HERSHEY_SIMPLEX, 0.5, color | |
) | |
except Exception as e: | |
print("Error: " + str(e)) | |
continue | |
return img0 | |