whoami02 commited on
Commit
a049953
1 Parent(s): f393cf4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -32
app.py CHANGED
@@ -60,9 +60,9 @@ def get_vals(file_path, wh):
60
 
61
  def clean_dir(path):
62
  files = os.listdir(path=path)
63
- return files
64
- # for i in range(1,len(files)+1):
65
- # os.remove(f"{path}/{i}.jpg")
66
 
67
  def html_path(img, counter):
68
  img.save(f"{sub_img_temp}/{counter}.jpg")
@@ -72,43 +72,44 @@ def create_box(l): # l represents the bounds of box
72
  return (l[0], l[2], l[1], l[3])
73
 
74
  def process(filepath, regex, size=(1656,1170)):
75
- f1 = clean_dir(path=img_temp)
76
- f2 = clean_dir(path=sub_img_temp)
77
- return [f1, f2]
78
- # img = Image.open(filepath)
79
- # (width, height), parts, counter, dimensions, im_, values = img.size, [], 0, [], [], []
80
- # for i in range(0, width, size[0]):
81
- # for j in range(0, height, size[1]):
82
- # counter += 1
83
- # box = (i, j, i+size[0], j+size[1])
84
- # img.crop(box).save(f"{img_temp}/{counter}.jpg")
85
- # parts.append(img.crop(box))
86
- # temp= os.listdir(path=img_temp) # temp represents a temporary variable that contains directory information
87
- # if regex == 'Regex-1':
88
- # pattern = re.compile(r"^\s\b\d+([\.,]\d+)?")
89
- # else:
90
- # pattern = re.compile(r"\d+")
91
 
92
- # data = get_vals(img_temp, wh=math.floor(math.sqrt(len(temp))))
93
- # counter, idx = 1, []
94
- # for d in data:
95
- # dimensions.append(ast.literal_eval(d.split(':')[0]))
96
- # im_.append(html_path(img.crop(create_box(ast.literal_eval(d.split(':')[0]))), counter=counter))
97
- # values.append(d.split(':')[1])
98
- # counter += 1
99
- # metadata = pd.DataFrame(zip(dimensions, im_, values), columns=['Coordinates','Image','Value'])
100
- # df = metadata[metadata['Value'].str.contains(pattern)] #[img.size] moreover df is a chunk taken from metadata which contains the regex pattern.
101
 
102
- # return df#.to_markdown()
103
 
104
  def main():
105
 
106
  demo = gr.Interface(
107
  fn=process,
108
  inputs=[gr.Image(type="filepath", interactive=True),gr.Dropdown(['Regex-1'])],
109
- # outputs=gr.DataFrame(wrap=True, datatype = ["str", "markdown", "str"], interactive=True),
110
- outputs = "list",
111
- title="OCR"
 
112
  )
113
  demo.launch(debug=True, show_error=True)
114
 
 
60
 
61
  def clean_dir(path):
62
  files = os.listdir(path=path)
63
+ # return files
64
+ for i in range(1,len(files)+1):
65
+ os.remove(f"{path}/{i}.jpg")
66
 
67
  def html_path(img, counter):
68
  img.save(f"{sub_img_temp}/{counter}.jpg")
 
72
  return (l[0], l[2], l[1], l[3])
73
 
74
  def process(filepath, regex, size=(1656,1170)):
75
+ clean_dir(path=img_temp)
76
+ clean_dir(path=sub_img_temp)
77
+ # return [f1, f2]
78
+ img = Image.open(filepath)
79
+ (width, height), parts, counter, dimensions, im_, values = img.size, [], 0, [], [], []
80
+ for i in range(0, width, size[0]):
81
+ for j in range(0, height, size[1]):
82
+ counter += 1
83
+ box = (i, j, i+size[0], j+size[1])
84
+ img.crop(box).save(f"{img_temp}/{counter}.jpg")
85
+ parts.append(img.crop(box))
86
+ temp= os.listdir(path=img_temp) # temp represents a temporary variable that contains directory information
87
+ if regex == 'Regex-1':
88
+ pattern = re.compile(r"^\s\b\d+([\.,]\d+)?")
89
+ else:
90
+ pattern = re.compile(r"\d+")
91
 
92
+ data = get_vals(img_temp, wh=math.floor(math.sqrt(len(temp))))
93
+ counter, idx = 1, []
94
+ for d in data:
95
+ dimensions.append(ast.literal_eval(d.split(':')[0]))
96
+ im_.append(html_path(img.crop(create_box(ast.literal_eval(d.split(':')[0]))), counter=counter))
97
+ values.append(d.split(':')[1])
98
+ counter += 1
99
+ metadata = pd.DataFrame(zip(dimensions, im_, values), columns=['Coordinates','Image','Value'])
100
+ df = metadata[metadata['Value'].str.contains(pattern)] #[img.size] moreover df is a chunk taken from metadata which contains the regex pattern.
101
 
102
+ return df#.to_markdown()
103
 
104
  def main():
105
 
106
  demo = gr.Interface(
107
  fn=process,
108
  inputs=[gr.Image(type="filepath", interactive=True),gr.Dropdown(['Regex-1'])],
109
+ outputs=gr.DataFrame(wrap=True, datatype = ["str", "markdown", "str"], interactive=True),
110
+ # outputs = "list",
111
+ title="OCR",
112
+ description="Issue with filesystem...not able to parse all files in the folders",
113
  )
114
  demo.launch(debug=True, show_error=True)
115