# from __future__ import division, print_function # coding=utf-8 # import sys import os # import glob # import re import numpy as np # import datetime # Keras # from tensorflow.keras.models import load_model #from tensorflow.keras.preprocessing import image # Flask utils # from flask import Flask, redirect, url_for, request, render_template # from werkzeug.utils import secure_filename # from gevent.pywsgi import WSGIServer #import everytnimg # from skimage.io import imread, imshow # from skimage.filters import gaussian, threshold_otsu # from skimage.feature import canny # from skimage.transform import probabilistic_hough_line, rotate # from process_image import process_image # import glob # import math import cv2 # import numpy as np # from PIL import Image # from matplotlib import pyplot as plt # from matplotlib.patches import Rectangle #%matplotlib inline # from collections import OrderedDict # from PIL import Image # import pandas as pd # import seaborn as sns # import math #import all from Hough transfrom cell # from skimage.transform import hough_line, hough_line_peaks # from skimage.transform import rotate # from skimage.feature import canny # from skimage.io import imread # from skimage.color import rgb2gray # import matplotlib.pyplot as plt # from scipy.stats import mode as md # from myhough import deskew, deskew2 # from segment_words import sortit,words,createk,hpf,bps,wps,baw # from myverify import verify #from detect_frame import detect_frame # import pathlib from PIL import ImageFont, ImageDraw, Image font = ImageFont.truetype("kalpurush.ttf", 60) citation_text = """

This is a demo space for the paper: Improving Character Recognition in Bangla Handwritten Words: A Two-Stage Single Shot Detector Approach.

Direct Link to the Paper: https://ieeexplore.ieee.org/document/10499463

ResearchGate Link to the Paper: https://bit.ly/ResearchGatePaperLink

Please cite this paper with BibTeX as follows:

@INPROCEEDINGS{10499463,
  author={Pal, Avi and Hasan, Md. Sajid and Masudul Ahsan, Sk. Md.},
  booktitle={2024 International Conference on Advances in Computing, Communication, Electrical, and Smart Systems (iCACCESS)}, 
  title={Improving Character Recognition in Bangla Handwritten Words: A Two-Stage Single Shot Detector Approach}, 
  year={2024},
  volume={},
  number={},
  pages={1-6},
  keywords={YOLO;Deep learning;Image segmentation;Handwriting recognition;Image recognition;Optical character recognition;Transfer learning;Machine learning;Image Processing and Computer Vision;Optical character recognition;Tensorflow;MobileNetV2;Convolutional Neural Network;Single Shot Detector},
  doi={10.1109/iCACCESS61735.2024.10499463}}
""" #import more import tensorflow as tf from object_detection.utils import config_util # from object_detection.protos import pipeline_pb2 # from google.protobuf import text_format # import os from object_detection.utils import label_map_util # from object_detection.utils import visualization_utils as viz_utils from object_detection.builders import model_builder # Load pipeline config and build a detection model WORKSPACE_PATH = 'Tensorflow/workspace' # SCRIPTS_PATH = 'Tensorflow/scripts' #APIMODEL_PATH = 'Tensorflow/models' ANNOTATION_PATH = WORKSPACE_PATH+'/annotations' # IMAGE_PATH = WORKSPACE_PATH+'/images' MODEL_PATH = WORKSPACE_PATH+'/models' PRETRAINED_MODEL_PATH = WORKSPACE_PATH+'/pre-trained-models' CONFIG_PATH = MODEL_PATH+'/my_ssd_mobnet/pipeline.config' CHECKPOINT_PATH = MODEL_PATH+'/my_ssd_mobnet/' # INPUT_IMAGE_PATH = 'Tensorflow/myimages' # MODEL_PATH = 'E:/RealTimeObjectDetection/model.best.hdf5' configs = config_util.get_configs_from_pipeline_file(CONFIG_PATH) detection_model = model_builder.build(model_config=configs['model'], is_training=False) # Restore checkpoint ckpt = tf.compat.v2.train.Checkpoint(model=detection_model) ckpt.restore(os.path.join(CHECKPOINT_PATH, 'ckpt-51')).expect_partial() @tf.function def detect_fn(image): image, shapes = detection_model.preprocess(image) prediction_dict = detection_model.predict(image, shapes) detections = detection_model.postprocess(prediction_dict, shapes) return detections def detect_frame(frame,isRealTime = False): image_np = np.array(frame) cpimg = frame.copy() input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32) detections = detect_fn(input_tensor) print(len(detections)) num_detections = int(detections.pop('num_detections')) #print("hello") #print(num_detections) #print(len(detections['detection_scores'])) detections = {key: value[0, :num_detections].numpy() for key, value in detections.items()} detections['num_detections'] = num_detections row,col,dummy = image_np.shape # detection_classes should be ints. detections['detection_classes'] = detections['detection_classes'].astype(np.int64) #print(detections['detection_classes']) mark = [0]*15 myletters = [] for i in range(0,15): curi=detections['detection_classes'][i] classi=classes[curi] print(classes[curi],end='-') cur=detections['detection_scores'][i] if(cur<0.2): continue print(cur,end=' ') print(detections['detection_boxes'][i], end=' ') x0=(detections['detection_boxes'][i][0]) y0=(detections['detection_boxes'][i][1]) x1=(detections['detection_boxes'][i][2]) y1=(detections['detection_boxes'][i][3]) curarea=(x1-x0)*(y1-y0) ok=1 for j in range(0,i): #print(mark[j]) if mark[j]==0: continue curj=detections['detection_classes'][j] classj=classes[curj] if classi=='ি' or classj=='ি': if classi!=classj: continue if classi=='ী' or classj=='ী': if classi!=classj: continue x2=(detections['detection_boxes'][j][0]) y2=(detections['detection_boxes'][j][1]) x3=(detections['detection_boxes'][j][2]) y3=(detections['detection_boxes'][j][3]) x4=max(x0,x2) y4=max(y0,y2) x5=min(x1,x3) y5=min(y1,y3) if x4>x5 or y4>y5: continue prevarea=(x3-x2)*(y3-y2) commonarea=(x5-x4)*(y5-y4) ins1=curarea/commonarea ins2=prevarea/commonarea ins=commonarea/(curarea+prevarea-commonarea) print(ins1,end=' ') if(ins>=0.5): ok=0 cur=detections['detection_classes'][j] print(classes[cur]) break if ok==1: mark[i]=1 cur=detections['detection_classes'][i] #myletters.append(classes[cur]) print(ok) #verification for i in range(0,15): if mark[i]==0 or avver==0: continue if detections['detection_classes'][i]>38: continue x0=int(detections['detection_boxes'][i][0]*row) y0=int(detections['detection_boxes'][i][1]*col) x1=int(detections['detection_boxes'][i][2]*row) y1=int(detections['detection_boxes'][i][3]*col) #print(y0,y1,x0,x1) currImg = cpimg[x0:x1,y0:y1] curscore = detections['detection_scores'][i] curclass = detections['detection_classes'][i] label,conf = verify(currImg) #print(ulta[label],conf) #print(curclass,curscore) if conf>curscore and ulta[label]!=curclass and ulta[label]!=-1: detections['detection_classes'][i]=ulta[label] detections['detection_scores'][i]=conf for i in range(0,15): if(detections['detection_scores'][i]<0.2): continue if mark[i]==0: continue cur=detections['detection_classes'][i] cur=classes[cur] y0=(detections['detection_boxes'][i][1]) y1=(detections['detection_boxes'][i][3]) pair = (y0,cur,y1) myletters.append(pair) myletters.sort(key = lambda x: x[0]) #print(myletters) for i in range(len(myletters)-1,-1,-1): y0=myletters[i][0] curr=myletters[i][1] y1=myletters[i][2] if curr=='ু' or curr=='্র': mxarea=0 mxno=i-1 for j in range(0,len(myletters)): if i==j: continue y2=myletters[j][0] y3=myletters[j][2] curcommon = min(y3,y1)-max(y0,y2) if curcommon>mxarea: mxarea = curcommon mxno=j if mxno!=(i-1): myletters[i],myletters[i+1]=myletters[i+1],myletters[i] res_list = [x[1] for x in myletters] print(res_list) for i in range(len(res_list)-2, -1, -1): x=res_list[i] y=res_list[i+1] if x=='ে' or x=='ি': res_list[i],res_list[i+1]=res_list[i+1],res_list[i] for i in range(len(res_list)-2, -1, -1): x=res_list[i] y=res_list[i+1] print(x,y) if x=='অ' and y=='া': print('yo') res_list[i]='আ' res_list.pop(i+1) print(res_list) for i in res_list: print(i,end='') print(' ') return res_list # Define a flask app # app = Flask(__name__) # Model saved with Keras model.save() # Load your trained model # model = load_model(MODEL_PATH) #model._make_predict_function() # Necessary # print('Model loaded. Start serving...') # You can also use pretrained model from Keras # Check https://keras.io/applications/ #from keras.applications.resnet50 import ResNet50 #model = ResNet50(weights='imagenet') #model.save('') # print('Model loaded. Check http://127.0.0.1:5000/') avver=0 clicked=1 wp = None; bp = None; category_index = label_map_util.create_category_index_from_labelmap(ANNOTATION_PATH+'/label_map.pbtxt') classes=['অ','ই','উ','এ','ও','ক','খ','গ','ঘ','চ','ছ','জ','ঝ','ট','ঠ','ড','ত','থ','দ','ধ','ন','প','ফ','ব','ভ','ম','য','র','ল','শ','ষ','স','হ','ড়','য়','ৎ','ং','ঁ','০','১','২','৩','৪','৫','৭','৮','া','ি','ী','ে','ু','্র','্য'] labels=[1,2,4,7,9,11,12,13,14,16,17,18,19,21,22,23,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,45,46,47,49,50,51,52,53,54,55,57,58,60,61,62,63,64,66,67] ulta=[0,-1,1,-1,2,-1,-1,3,-1,4,-1,5,6,7,8,-1,9,10,11,12,-1,13,14,15,-1,-1,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,-1,34,35,36,-1,37,38,39,40,41,42,43,-1,44,45,-1,46,47,48,49,50,-1,51,52] def model_predict(word): #img = cv2.imread(img_path,cv2.IMREAD_GRAYSCALE) ''' if clicked==1: bp = 66 wp = 160 mode = "GCMODE" if mode == "GCMODE": img= hpf(img,kSize = 51) wp = 127 img = wps(img,wp) img = bps(img) elif mode == "RMODE": bps() wps() elif mode == "SMODE": bps() wps() baw() img = cv2.fastNlMeansDenoising(img, img, 50.0, 7, 21) print("\ndone.") xs=img.shape if len(xs)==3: img = img[:,:,0] img = cv.adaptiveThreshold(img,255,cv.ADAPTIVE_THRESH_GAUSSIAN_C,cv.THRESH_BINARY,11,2) angeel = deskew(img) if angeel!=0: img = deskew2(img,angeel) ho,wo=img.shape area=ho*wo ara=words(img,25,11,7,area/5000) ara.reverse() #cv2.imshow('input image',img) sz=len(ara) for i in range(0,sz): ara[i]=sorted(ara[i], key=lambda entry:entry[0][0]) cnt2=0 files = glob.glob('Tensorflow/myimages/*') for f in files: os.remove(f) for i in range(0,sz): #print(ara[i].shape) tmp=ara[i] sz2=len(tmp) if i%10==0: cnt2=cnt2+1 for j in range(0,sz2): a,b=tmp[j] b = cv2.adaptiveThreshold(b,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2) if j<10: cnt3=0 elif j<20: cnt3=1 else: cnt3=2 cv2.imwrite('Tensorflow/myimages/ocr %d%d%d%d.jpg' % (cnt2,i,cnt3,j), b) #cv2.imshow('Crop %d%d' % (i,j), b) cv2.waitKey(0) PATH_TO_TEST_IMAGES_DIR = pathlib.Path('Tensorflow/myimages') TEST_IMAGE_PATHS = (list(PATH_TO_TEST_IMAGES_DIR.glob("*.jpg"))+list(PATH_TO_TEST_IMAGES_DIR.glob("*.jpeg"))) #+list(PATH_TO_TEST_IMAGES_DIR.glob("*.png")) print(len(TEST_IMAGE_PATHS)) final = [] for image_path in TEST_IMAGE_PATHS: print("ovi") print(image_path) frame = cv2.imread(str(image_path)) x=str(image_path) print(x[25]) # gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) final.append((detect_frame(frame),x[25])) ''' frame = cv2.fastNlMeansDenoising(word,word, 50.0, 7, 21) xs = frame.shape if(len(xs)==3): frame = frame[:,:,0] frame= cv2.adaptiveThreshold(frame,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2) frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR) # x=str(img_path) #print(x[25]) # gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) image_np = np.array(frame) cpimg = frame.copy() input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32) image_t, shapes = detection_model.preprocess(input_tensor) prediction_dict = detection_model.predict(image_t, shapes) detections = detection_model.postprocess(prediction_dict, shapes) # print(len(detections)) num_detections = int(detections.pop('num_detections')) #print("hello") #print(num_detections) #print(len(detections['detection_scores'])) detections = {key: value[0, :num_detections].numpy() for key, value in detections.items()} detections['num_detections'] = num_detections row,col,dummy = image_np.shape # detection_classes should be ints. detections['detection_classes'] = detections['detection_classes'].astype(np.int64) #print(detections['detection_classes']) mark = [0]*15 myletters = [] for i in range(0,15): curi=detections['detection_classes'][i] classi=classes[curi] # print(classes[curi],end='-') cur=detections['detection_scores'][i] if(cur<0.2): continue # print(cur,end=' ') # print(detections['detection_boxes'][i], end=' ') x0=(detections['detection_boxes'][i][0]) y0=(detections['detection_boxes'][i][1]) x1=(detections['detection_boxes'][i][2]) y1=(detections['detection_boxes'][i][3]) curarea=(x1-x0)*(y1-y0) ok=1 for j in range(0,i): #print(mark[j]) if mark[j]==0: continue curj=detections['detection_classes'][j] classj=classes[curj] if classi=='ি' or classj=='ি': if classi!=classj: continue if classi=='ী' or classj=='ী': if classi!=classj: continue x2=(detections['detection_boxes'][j][0]) y2=(detections['detection_boxes'][j][1]) x3=(detections['detection_boxes'][j][2]) y3=(detections['detection_boxes'][j][3]) x4=max(x0,x2) y4=max(y0,y2) x5=min(x1,x3) y5=min(y1,y3) if x4>x5 or y4>y5: continue prevarea=(x3-x2)*(y3-y2) commonarea=(x5-x4)*(y5-y4) ins1=curarea/commonarea ins2=prevarea/commonarea ins=commonarea/(curarea+prevarea-commonarea) # print(ins1,end=' ') if(ins>=0.5): ok=0 cur=detections['detection_classes'][j] # print(classes[cur]) break if ok==1: mark[i]=1 cur=detections['detection_classes'][i] #myletters.append(classes[cur]) # print(ok) #verification for i in range(0,15): if mark[i]==0 or avver==0: continue if detections['detection_classes'][i]>38: continue x0=int(detections['detection_boxes'][i][0]*row) y0=int(detections['detection_boxes'][i][1]*col) x1=int(detections['detection_boxes'][i][2]*row) y1=int(detections['detection_boxes'][i][3]*col) #print(y0,y1,x0,x1) currImg = cpimg[x0:x1,y0:y1] curscore = detections['detection_scores'][i] curclass = detections['detection_classes'][i] label,conf = verify(currImg) #print(ulta[label],conf) #print(curclass,curscore) if conf>curscore and ulta[label]!=curclass and ulta[label]!=-1: detections['detection_classes'][i]=ulta[label] detections['detection_scores'][i]=conf for i in range(0,15): if(detections['detection_scores'][i]<0.2): continue if mark[i]==0: continue cur=detections['detection_classes'][i] cur=classes[cur] y0=(detections['detection_boxes'][i][1]) y1=(detections['detection_boxes'][i][3]) pair = (y0,cur,y1) myletters.append(pair) myletters.sort(key = lambda x: x[0]) #print(myletters) for i in range(len(myletters)-1,-1,-1): y0=myletters[i][0] curr=myletters[i][1] y1=myletters[i][2] if curr=='ু' or curr=='্র': mxarea=0 mxno=i-1 for j in range(0,len(myletters)): if i==j: continue y2=myletters[j][0] y3=myletters[j][2] curcommon = min(y3,y1)-max(y0,y2) if curcommon>mxarea: mxarea = curcommon mxno=j if mxno!=(i-1): myletters[i],myletters[i+1]=myletters[i+1],myletters[i] res_list = [x[1] for x in myletters] # print(res_list) for i in range(len(res_list)-2, -1, -1): x=res_list[i] y=res_list[i+1] if x=='ে' or x=='ি': res_list[i],res_list[i+1]=res_list[i+1],res_list[i] for i in range(len(res_list)-2, -1, -1): x=res_list[i] y=res_list[i+1] # print(x,y) if x=='অ' and y=='া': # print('yo') res_list[i]='আ' res_list.pop(i+1) # print(res_list) output='' for i in res_list: output=output+i # print(' ') # time_now = datetime.datetime.now().strftime('%m_%d_%Y_%I_%M_%S_%p') # # print(time_now) # date = datetime.date.today().strftime('%Y_%m_%d') # # print(date) # folderName = "created/"+date # if(not os.path.isdir(folderName)): # os.makedirs(folderName) # fileName = folderName+ "/" + time_now + ".png" # cv2.imwrite(fileName,word) pil_image = Image.fromarray(word) #pil_image.convert("RGBA") for i in range(0,15): if mark[i]==0: continue x0=(detections['detection_boxes'][i][0])*row y0=(detections['detection_boxes'][i][1])*col x1=(detections['detection_boxes'][i][2])*row y1=(detections['detection_boxes'][i][3])*col pt1 = (y0,x0) pt2 = (y1,x1) # color = (0, 0, 255) # Red color in BGR format # thickness = 2 # Border thickness in pixels # word = cv2.rectangle(word, pt1, pt2, color, thickness) draw = ImageDraw.Draw(pil_image,"RGBA") curi=detections['detection_classes'][i] classi=classes[curi] shape = [(y0,x0), (y1, x1)] draw.rectangle(shape,fill=(0, 100, 200, 127)) draw.rectangle(shape, outline=(0, 0, 0, 127), width=3) bbox = draw.textbbox(pt1, classi, font=font) draw.rectangle(bbox, fill=(200, 100, 0, 200)) draw.text(pt1, classi, font=font, fill=(0,0,0,255)) newWordImg = np.asarray(pil_image) return output, newWordImg ''' output='' for i in range(0,len(final)): ara=final[i][0] numb=final[i][1] if i>0 and numb!=final[i-1][1]: output= output+'\n' word = ''.join(ara) #corrected_word = get_campaign(word) output= output + word #print(corrected_word,end='') output = output + ' ' return output ''' import gradio as gr HF_TOKEN = os.getenv("SECRET_TOKEN") hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "word-flag-data") demo = gr.Interface(fn=model_predict, inputs= "paint", outputs=["text","image"], # title= "Bangla Word OCR", description="Reduce pen ink size from Pen Icon(🖋️) for better results!", examples=[ ["Tensorflow/workspace/images/tmpbvc06xxf.png"], ["Tensorflow/workspace/images/tmpfhin6fzg.png"], ], article=citation_text,allow_flagging="auto",flagging_callback=hf_writer) demo.launch()