Spaces:

whoami02
/

ocr_vst

Sleeping

App Files Files Community

whoami02 commited on Feb 13, 2024

Commit

a049953

verified ·

1 Parent(s): f393cf4

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -32

app.py CHANGED Viewed

@@ -60,9 +60,9 @@ def get_vals(file_path, wh):
 def clean_dir(path):
     files = os.listdir(path=path)
-    return files
-    # for i in range(1,len(files)+1):
-    #   os.remove(f"{path}/{i}.jpg")
 def html_path(img, counter):
     img.save(f"{sub_img_temp}/{counter}.jpg")
@@ -72,43 +72,44 @@ def create_box(l):  # l represents the bounds of box
     return (l[0], l[2], l[1], l[3])
 def process(filepath, regex, size=(1656,1170)):
-    f1 = clean_dir(path=img_temp)
-    f2 = clean_dir(path=sub_img_temp)
-    return [f1, f2]
-    # img = Image.open(filepath)
-    # (width, height), parts, counter, dimensions, im_, values = img.size, [], 0, [], [], []
-    # for i in range(0, width, size[0]):
-    #     for j in range(0, height, size[1]):
-    #         counter += 1
-    #         box = (i, j, i+size[0], j+size[1])
-    #         img.crop(box).save(f"{img_temp}/{counter}.jpg")
-    #         parts.append(img.crop(box))
-    # temp= os.listdir(path=img_temp) # temp represents a temporary variable that contains directory information
-    # if regex == 'Regex-1':
-    #     pattern = re.compile(r"^\s\b\d+([\.,]\d+)?")
-    # else:
-    #     pattern = re.compile(r"\d+")
-    # data = get_vals(img_temp, wh=math.floor(math.sqrt(len(temp))))
-    # counter, idx = 1, []
-    # for d in data:
-    #     dimensions.append(ast.literal_eval(d.split(':')[0]))
-    #     im_.append(html_path(img.crop(create_box(ast.literal_eval(d.split(':')[0]))), counter=counter))
-    #     values.append(d.split(':')[1])
-    #     counter += 1
-    # metadata = pd.DataFrame(zip(dimensions, im_, values), columns=['Coordinates','Image','Value'])
-    # df =  metadata[metadata['Value'].str.contains(pattern)]  #[img.size] moreover df is a chunk taken from metadata which contains the regex pattern.
-    # return df#.to_markdown()
 def main():
     demo = gr.Interface(
         fn=process,
         inputs=[gr.Image(type="filepath", interactive=True),gr.Dropdown(['Regex-1'])],
-        # outputs=gr.DataFrame(wrap=True, datatype = ["str", "markdown", "str"], interactive=True),
-        outputs = "list",
-        title="OCR"
     )
     demo.launch(debug=True, show_error=True)

 def clean_dir(path):
     files = os.listdir(path=path)
+    # return files
+    for i in range(1,len(files)+1):
+      os.remove(f"{path}/{i}.jpg")
 def html_path(img, counter):
     img.save(f"{sub_img_temp}/{counter}.jpg")
     return (l[0], l[2], l[1], l[3])
 def process(filepath, regex, size=(1656,1170)):
+    clean_dir(path=img_temp)
+    clean_dir(path=sub_img_temp)
+    # return [f1, f2]
+    img = Image.open(filepath)
+    (width, height), parts, counter, dimensions, im_, values = img.size, [], 0, [], [], []
+    for i in range(0, width, size[0]):
+        for j in range(0, height, size[1]):
+            counter += 1
+            box = (i, j, i+size[0], j+size[1])
+            img.crop(box).save(f"{img_temp}/{counter}.jpg")
+            parts.append(img.crop(box))
+    temp= os.listdir(path=img_temp) # temp represents a temporary variable that contains directory information
+    if regex == 'Regex-1':
+        pattern = re.compile(r"^\s\b\d+([\.,]\d+)?")
+    else:
+        pattern = re.compile(r"\d+")
+    data = get_vals(img_temp, wh=math.floor(math.sqrt(len(temp))))
+    counter, idx = 1, []
+    for d in data:
+        dimensions.append(ast.literal_eval(d.split(':')[0]))
+        im_.append(html_path(img.crop(create_box(ast.literal_eval(d.split(':')[0]))), counter=counter))
+        values.append(d.split(':')[1])
+        counter += 1
+    metadata = pd.DataFrame(zip(dimensions, im_, values), columns=['Coordinates','Image','Value'])
+    df =  metadata[metadata['Value'].str.contains(pattern)]  #[img.size] moreover df is a chunk taken from metadata which contains the regex pattern.
+    return df#.to_markdown()
 def main():
     demo = gr.Interface(
         fn=process,
         inputs=[gr.Image(type="filepath", interactive=True),gr.Dropdown(['Regex-1'])],
+        outputs=gr.DataFrame(wrap=True, datatype = ["str", "markdown", "str"], interactive=True),
+        # outputs = "list",
+        title="OCR",
+        description="Issue with filesystem...not able to parse all files in the folders",
     )
     demo.launch(debug=True, show_error=True)