Spaces:

aico
/

TrOCR-digit

Sleeping

App Files Files Community

aico commited on Jun 28, 2022

Commit

f4d5676

•

1 Parent(s): e2823b0

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -8

app.py CHANGED Viewed

@@ -7,26 +7,92 @@ import cv2
 processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
 model = VisionEncoderDecoderModel.from_pretrained("aico/TrOCR-MNIST")
-def process_image(image):
     #print(np.shape(image))
     #print(image)
     #rint(image.astype('uint8'))
     #cv2.imwrite("image.png",image.astype('uint8'),(28, 28))
-    img = Image.fromarray(image.astype('uint8')).convert("RGB")
     #img = Image.open("image.png").convert("RGB")
-    print(img)
     # prepare image
-    pixel_values = processor(img, return_tensors="pt").pixel_values
     # generate (no beam search)
-    generated_ids = model.generate(pixel_values)
     # decode
-    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    return generated_text
 title = "Interactive demo: Single Digits MNIST"
 description = "Aico - University Utrecht"

 processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
 model = VisionEncoderDecoderModel.from_pretrained("aico/TrOCR-MNIST")
+def _group_rectangles(rec):
+    """
+    Uion intersecting rectangles.
+    Args:
+        rec - list of rectangles in form [x, y, w, h]
+    Return:
+        list of grouped ractangles
+    """
+    tested = [False for i in range(len(rec))]
+    final = []
+    i = 0
+    while i < len(rec):
+        if not tested[i]:
+            j = i+1
+            while j < len(rec):
+                if not tested[j] and intersect_area(rec[i], rec[j]):
+                    rec[i] = union(rec[i], rec[j])
+                    tested[j] = True
+                    j = i
+                j += 1
+            final += [rec[i]]
+        i += 1
+    return final
+def process_image(image):
+    bounding_boxes = []
+    generated_text_list = []
+    #boundingBoxes_2 = []
     #print(np.shape(image))
     #print(image)
+    #dim = (28,28)
+    #resized = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
     #rint(image.astype('uint8'))
     #cv2.imwrite("image.png",image.astype('uint8'),(28, 28))
+    #mask = np.zeros(np.shape(image), dtype=np.uint8)
+    thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
+    #gray = cv2.cvtColor(thresh, cv2.COLOR_BGR2GRAY)
+    cnts = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
+    (cnts, _) = contours.sort_contours(cnts, method="left-to-right")
+    dim = (28, 28)
+    for c in cnts:
+        area = cv2.contourArea(c)
+        #print(area)
+        #if area < 120:
+        bounding_boxes.append(cv2.boundingRect(c))
+        print("for loop bb: ",bounding_boxes)
+    boundingBoxes_filter =  [i for i in bounding_boxes if i != (0 , 0, 128, 128)]
+    boundingBoxes = _group_rectangles(boundingBoxes_filter)
+    #print(boundingBoxes)
+    #
+    #print(boundingBoxes_2)
+    for (x, y, w, h) in boundingBoxes:
+        print(x,y,w,h)
+        ROI = thresh[y:y+h, x:x+w]
+        ROI2 = cv2.bitwise_not(ROI)
+        borderoutput = cv2.copyMakeBorder(ROI2, 30, 30, 30, 30, cv2.BORDER_CONSTANT, value=[0, 0, 0])
+        resized = cv2.resize(borderoutput, dim, interpolation = cv2.INTER_AREA)
+        cv2.imwrite('ROI_{}.png'.format(x), resized)
+        #imageinv = cv2.bitwise_not(resized)
+        img = Image.fromarray(resized.astype('uint8')).convert("RGB")
+        pixel_values = processor(img, return_tensors="pt").pixel_values
+        generated_ids = model.generate(pixel_values)
+        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        print(generated_text)
+        generated_text_list.append(generated_text)
+    #img = Image.fromarray(image.astype('uint8')).convert("RGB")
     #img = Image.open("image.png").convert("RGB")
+    #print(img)
     # prepare image
+    #pixel_values = processor(img, return_tensors="pt").pixel_values
     # generate (no beam search)
+    #generated_ids = model.generate(pixel_values)
     # decode
+    #generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return ''.join(generated_text_list)
+    #return generated_text
 title = "Interactive demo: Single Digits MNIST"
 description = "Aico - University Utrecht"