mscsasem3 commited on
Commit
0a79cfc
1 Parent(s): 3fb6273

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +226 -238
app.py CHANGED
@@ -31,194 +31,194 @@ from sklearn.feature_extraction.text import TfidfVectorizer
31
 
32
 
33
 
34
- # processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
35
- # model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
36
- # plt.switch_backend('Agg')
37
- # def horizontal_projections(sobel_image):
38
- # return np.sum(sobel_image, axis=1)
39
 
40
 
41
- # def find_peak_regions(hpp, divider=4):
42
- # threshold = (np.max(hpp)-np.min(hpp))/divider
43
- # peaks = []
44
 
45
- # for i, hppv in enumerate(hpp):
46
- # if hppv < threshold:
47
- # peaks.append([i, hppv])
48
- # return peaks
49
-
50
- # def heuristic(a, b):
51
- # return (b[0] - a[0]) ** 2 + (b[1] - a[1]) ** 2
52
-
53
- # def get_hpp_walking_regions(peaks_index):
54
- # hpp_clusters = []
55
- # cluster = []
56
- # for index, value in enumerate(peaks_index):
57
- # cluster.append(value)
58
-
59
- # if index < len(peaks_index)-1 and peaks_index[index+1] - value > 1:
60
- # hpp_clusters.append(cluster)
61
- # cluster = []
62
-
63
- # #get the last cluster
64
- # if index == len(peaks_index)-1:
65
- # hpp_clusters.append(cluster)
66
- # cluster = []
67
 
68
- # return hpp_clusters
69
 
70
- # def astar(array, start, goal):
71
 
72
- # neighbors = [(0,1),(0,-1),(1,0),(-1,0),(1,1),(1,-1),(-1,1),(-1,-1)]
73
- # close_set = set()
74
- # came_from = {}
75
- # gscore = {start:0}
76
- # fscore = {start:heuristic(start, goal)}
77
- # oheap = []
78
 
79
- # heappush(oheap, (fscore[start], start))
80
 
81
- # while oheap:
82
-
83
- # current = heappop(oheap)[1]
84
-
85
- # if current == goal:
86
- # data = []
87
- # while current in came_from:
88
- # data.append(current)
89
- # current = came_from[current]
90
- # return data
91
-
92
- # close_set.add(current)
93
- # for i, j in neighbors:
94
- # neighbor = current[0] + i, current[1] + j
95
- # tentative_g_score = gscore[current] + heuristic(current, neighbor)
96
- # if 0 <= neighbor[0] < array.shape[0]:
97
- # if 0 <= neighbor[1] < array.shape[1]:
98
- # if array[neighbor[0]][neighbor[1]] == 1:
99
- # continue
100
- # else:
101
- # # array bound y walls
102
- # continue
103
- # else:
104
- # # array bound x walls
105
- # continue
106
 
107
- # if neighbor in close_set and tentative_g_score >= gscore.get(neighbor, 0):
108
- # continue
109
 
110
- # if tentative_g_score < gscore.get(neighbor, 0) or neighbor not in [i[1]for i in oheap]:
111
- # came_from[neighbor] = current
112
- # gscore[neighbor] = tentative_g_score
113
- # fscore[neighbor] = tentative_g_score + heuristic(neighbor, goal)
114
- # heappush(oheap, (fscore[neighbor], neighbor))
115
 
116
- # return []
117
-
118
- # def get_binary(img):
119
- # mean = np.mean(img)
120
- # if mean == 0.0 or mean == 1.0:
121
- # return img
122
-
123
- # thresh = threshold_otsu(img)
124
- # binary = img <= thresh
125
- # binary = binary*1
126
- # return binary
127
-
128
- # def path_exists(window_image):
129
- # #very basic check first then proceed to A* check
130
- # if 0 in horizontal_projections(window_image):
131
- # return True
132
 
133
- # padded_window = np.zeros((window_image.shape[0],1))
134
- # world_map = np.hstack((padded_window, np.hstack((window_image,padded_window)) ) )
135
- # path = np.array(astar(world_map, (int(world_map.shape[0]/2), 0), (int(world_map.shape[0]/2), world_map.shape[1])))
136
- # if len(path) > 0:
137
- # return True
138
 
139
- # return False
140
 
141
- # def get_road_block_regions(nmap):
142
- # road_blocks = []
143
- # needtobreak = False
144
 
145
- # for col in range(nmap.shape[1]):
146
- # start = col
147
- # end = col+20
148
- # if end > nmap.shape[1]-1:
149
- # end = nmap.shape[1]-1
150
- # needtobreak = True
151
-
152
- # if path_exists(nmap[:, start:end]) == False:
153
- # road_blocks.append(col)
154
-
155
- # if needtobreak == True:
156
- # break
157
 
158
- # return road_blocks
159
-
160
- # def group_the_road_blocks(road_blocks):
161
- # #group the road blocks
162
- # road_blocks_cluster_groups = []
163
- # road_blocks_cluster = []
164
- # size = len(road_blocks)
165
- # for index, value in enumerate(road_blocks):
166
- # road_blocks_cluster.append(value)
167
- # if index < size-1 and (road_blocks[index+1] - road_blocks[index]) > 1:
168
- # road_blocks_cluster_groups.append([road_blocks_cluster[0], road_blocks_cluster[len(road_blocks_cluster)-1]])
169
- # road_blocks_cluster = []
170
-
171
- # if index == size-1 and len(road_blocks_cluster) > 0:
172
- # road_blocks_cluster_groups.append([road_blocks_cluster[0], road_blocks_cluster[len(road_blocks_cluster)-1]])
173
- # road_blocks_cluster = []
174
-
175
- # return road_blocks_cluster_groups
176
-
177
- # def extract_line_from_image(image, lower_line, upper_line):
178
- # lower_boundary = np.min(lower_line[:, 0])
179
- # upper_boundary = np.min(upper_line[:, 0])
180
- # img_copy = np.copy(image)
181
- # r, c = img_copy.shape
182
- # for index in range(c-1):
183
- # img_copy[0:lower_line[index, 0], index] = 0
184
- # img_copy[upper_line[index, 0]:r, index] = 0
185
 
186
- # return img_copy[lower_boundary:upper_boundary, :]
187
 
188
- # def extract(image):
189
- # img = rgb2gray(image)
190
 
191
- # #img = rgb2gray(imread("Penwritten_2048x.jpeg"))
192
- # #img = rgb2gray(imread("test.jpg"))
193
- # #img = rgb2gray(imread(""))
194
 
195
 
196
 
197
 
198
- # sobel_image = sobel(img)
199
- # hpp = horizontal_projections(sobel_image)
200
 
201
 
202
- # warnings.filterwarnings("ignore")
203
- # #find the midway where we can make a threshold and extract the peaks regions
204
- # #divider parameter value is used to threshold the peak values from non peak values.
205
 
206
 
207
- # peaks = find_peak_regions(hpp)
208
 
209
- # peaks_index = np.array(peaks)[:,0].astype(int)
210
- # #print(peaks_index.shape)
211
- # segmented_img = np.copy(img)
212
- # r= segmented_img.shape
213
- # for ri in range(r[0]):
214
- # if ri in peaks_index:
215
- # segmented_img[ri, :] = 0
216
 
217
- # #group the peaks into walking windows
218
 
219
 
220
- # hpp_clusters = get_hpp_walking_regions(peaks_index)
221
- # #a star path planning algorithm
222
 
223
 
224
 
@@ -226,100 +226,100 @@ from sklearn.feature_extraction.text import TfidfVectorizer
226
 
227
 
228
 
229
- # #Scan the paths to see if there are any blockers.
230
 
231
 
232
 
233
 
234
- # binary_image = get_binary(img)
235
 
236
- # for cluster_of_interest in hpp_clusters:
237
- # nmap = binary_image[cluster_of_interest[0]:cluster_of_interest[len(cluster_of_interest)-1],:]
238
- # road_blocks = get_road_block_regions(nmap)
239
- # road_blocks_cluster_groups = group_the_road_blocks(road_blocks)
240
- # #create the doorways
241
- # for index, road_blocks in enumerate(road_blocks_cluster_groups):
242
- # window_image = nmap[:, road_blocks[0]: road_blocks[1]+10]
243
- # binary_image[cluster_of_interest[0]:cluster_of_interest[len(cluster_of_interest)-1],:][:, road_blocks[0]: road_blocks[1]+10][int(window_image.shape[0]/2),:] *= 0
244
 
245
- # #now that everything is cleaner, its time to segment all the lines using the A* algorithm
246
- # line_segments = []
247
- # #print(len(hpp_clusters))
248
- # #print(hpp_clusters)
249
- # for i, cluster_of_interest in enumerate(hpp_clusters):
250
- # nmap = binary_image[cluster_of_interest[0]:cluster_of_interest[len(cluster_of_interest)-1],:]
251
- # path = np.array(astar(nmap, (int(nmap.shape[0]/2), 0), (int(nmap.shape[0]/2),nmap.shape[1]-1)))
252
- # #print(path.shape)
253
- # if path.shape[0]!=0:
254
- # #break
255
- # offset_from_top = cluster_of_interest[0]
256
- # #print(offset_from_top)
257
- # path[:,0] += offset_from_top
258
- # #print(path)
259
- # line_segments.append(path)
260
- # #print(i)
261
 
262
- # cluster_of_interest = hpp_clusters[1]
263
- # offset_from_top = cluster_of_interest[0]
264
- # nmap = binary_image[cluster_of_interest[0]:cluster_of_interest[len(cluster_of_interest)-1],:]
265
- # #plt.figure(figsize=(20,20))
266
- # #plt.imshow(invert(nmap), cmap="gray")
267
 
268
- # path = np.array(astar(nmap, (int(nmap.shape[0]/2), 0), (int(nmap.shape[0]/2),nmap.shape[1]-1)))
269
- # #plt.plot(path[:,1], path[:,0])
270
 
271
- # offset_from_top = cluster_of_interest[0]
272
 
273
 
274
 
275
- # ## add an extra line to the line segments array which represents the last bottom row on the image
276
- # last_bottom_row = np.flip(np.column_stack(((np.ones((img.shape[1],))*img.shape[0]), np.arange(img.shape[1]))).astype(int), axis=0)
277
- # line_segments.append(last_bottom_row)
278
 
279
- # line_images = []
280
 
281
 
282
 
283
 
284
- # line_count = len(line_segments)
285
- # fig, ax = plt.subplots(figsize=(10,10), nrows=line_count-1)
286
- # output = []
287
 
288
 
289
- # for line_index in range(line_count-1):
290
- # line_image = extract_line_from_image(img, line_segments[line_index], line_segments[line_index+1])
291
- # line_images.append(line_image)
292
- # #print(line_image)
293
- # #cv2.imwrite('/Users/vatsalya/Desktop/demo.jpeg',line_image)
294
 
295
 
296
- # # im=Image.fromarray(line_image)
297
- # # im=im.convert("L")
298
- # # im.save("demo.jpeg")
299
- # # print("#### Image Saved #######")
300
- # new_p = Image.fromarray(line_image)
301
- # if new_p.mode != 'RGB':
302
- # new_p = new_p.convert('RGB')
303
- # imageio.imwrite('demo.jpeg',new_p)
304
 
305
 
306
 
307
- # image = Image.open("demo.jpeg").convert("RGB")
308
 
309
- # #print("Started Processing")
310
- # #image = line_image
311
- # pixel_values = processor(images=image, return_tensors="pt").pixel_values
312
 
313
- # generated_ids = model.generate(pixel_values)
314
- # generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
315
- # print(generated_text)
316
- # output.append(generated_text)
317
- # #ax[line_index].imshow(line_image, cmap="gray")
318
- # result=""
319
- # for o in output:
320
- # result=result+o
321
- # result=result+" "
322
- # return result
323
 
324
 
325
 
@@ -446,19 +446,7 @@ import requests
446
  import base64
447
  def extract_eval(image1,image2,image3,image4):
448
  print(image1)
449
- #ideal_text=extract(image1)
450
-
451
- image_64_encode = base64.b64encode(image1)
452
- print(image_64_encode)
453
- print(image_64_encode.decode('utf-8'))
454
- encode_string="data:image/png;base64,"+str(image_64_encode)
455
- response = requests.post("https://mscsasem3-recogniser.hf.space/run/predict", json={
456
- "data": [
457
- #"",
458
- encode_string
459
- ]}).json()
460
-
461
- ideal_text = response["data"]
462
  print(data)
463
  print("Extracting Ideal Text \n")
464
  print(ideal_text)
 
31
 
32
 
33
 
34
+ processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
35
+ model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
36
+ plt.switch_backend('Agg')
37
+ def horizontal_projections(sobel_image):
38
+ return np.sum(sobel_image, axis=1)
39
 
40
 
41
+ def find_peak_regions(hpp, divider=4):
42
+ threshold = (np.max(hpp)-np.min(hpp))/divider
43
+ peaks = []
44
 
45
+ for i, hppv in enumerate(hpp):
46
+ if hppv < threshold:
47
+ peaks.append([i, hppv])
48
+ return peaks
49
+
50
+ def heuristic(a, b):
51
+ return (b[0] - a[0]) ** 2 + (b[1] - a[1]) ** 2
52
+
53
+ def get_hpp_walking_regions(peaks_index):
54
+ hpp_clusters = []
55
+ cluster = []
56
+ for index, value in enumerate(peaks_index):
57
+ cluster.append(value)
58
+
59
+ if index < len(peaks_index)-1 and peaks_index[index+1] - value > 1:
60
+ hpp_clusters.append(cluster)
61
+ cluster = []
62
+
63
+ #get the last cluster
64
+ if index == len(peaks_index)-1:
65
+ hpp_clusters.append(cluster)
66
+ cluster = []
67
 
68
+ return hpp_clusters
69
 
70
+ def astar(array, start, goal):
71
 
72
+ neighbors = [(0,1),(0,-1),(1,0),(-1,0),(1,1),(1,-1),(-1,1),(-1,-1)]
73
+ close_set = set()
74
+ came_from = {}
75
+ gscore = {start:0}
76
+ fscore = {start:heuristic(start, goal)}
77
+ oheap = []
78
 
79
+ heappush(oheap, (fscore[start], start))
80
 
81
+ while oheap:
82
+
83
+ current = heappop(oheap)[1]
84
+
85
+ if current == goal:
86
+ data = []
87
+ while current in came_from:
88
+ data.append(current)
89
+ current = came_from[current]
90
+ return data
91
+
92
+ close_set.add(current)
93
+ for i, j in neighbors:
94
+ neighbor = current[0] + i, current[1] + j
95
+ tentative_g_score = gscore[current] + heuristic(current, neighbor)
96
+ if 0 <= neighbor[0] < array.shape[0]:
97
+ if 0 <= neighbor[1] < array.shape[1]:
98
+ if array[neighbor[0]][neighbor[1]] == 1:
99
+ continue
100
+ else:
101
+ # array bound y walls
102
+ continue
103
+ else:
104
+ # array bound x walls
105
+ continue
106
 
107
+ if neighbor in close_set and tentative_g_score >= gscore.get(neighbor, 0):
108
+ continue
109
 
110
+ if tentative_g_score < gscore.get(neighbor, 0) or neighbor not in [i[1]for i in oheap]:
111
+ came_from[neighbor] = current
112
+ gscore[neighbor] = tentative_g_score
113
+ fscore[neighbor] = tentative_g_score + heuristic(neighbor, goal)
114
+ heappush(oheap, (fscore[neighbor], neighbor))
115
 
116
+ return []
117
+
118
+ def get_binary(img):
119
+ mean = np.mean(img)
120
+ if mean == 0.0 or mean == 1.0:
121
+ return img
122
+
123
+ thresh = threshold_otsu(img)
124
+ binary = img <= thresh
125
+ binary = binary*1
126
+ return binary
127
+
128
+ def path_exists(window_image):
129
+ #very basic check first then proceed to A* check
130
+ if 0 in horizontal_projections(window_image):
131
+ return True
132
 
133
+ padded_window = np.zeros((window_image.shape[0],1))
134
+ world_map = np.hstack((padded_window, np.hstack((window_image,padded_window)) ) )
135
+ path = np.array(astar(world_map, (int(world_map.shape[0]/2), 0), (int(world_map.shape[0]/2), world_map.shape[1])))
136
+ if len(path) > 0:
137
+ return True
138
 
139
+ return False
140
 
141
+ def get_road_block_regions(nmap):
142
+ road_blocks = []
143
+ needtobreak = False
144
 
145
+ for col in range(nmap.shape[1]):
146
+ start = col
147
+ end = col+20
148
+ if end > nmap.shape[1]-1:
149
+ end = nmap.shape[1]-1
150
+ needtobreak = True
151
+
152
+ if path_exists(nmap[:, start:end]) == False:
153
+ road_blocks.append(col)
154
+
155
+ if needtobreak == True:
156
+ break
157
 
158
+ return road_blocks
159
+
160
+ def group_the_road_blocks(road_blocks):
161
+ #group the road blocks
162
+ road_blocks_cluster_groups = []
163
+ road_blocks_cluster = []
164
+ size = len(road_blocks)
165
+ for index, value in enumerate(road_blocks):
166
+ road_blocks_cluster.append(value)
167
+ if index < size-1 and (road_blocks[index+1] - road_blocks[index]) > 1:
168
+ road_blocks_cluster_groups.append([road_blocks_cluster[0], road_blocks_cluster[len(road_blocks_cluster)-1]])
169
+ road_blocks_cluster = []
170
+
171
+ if index == size-1 and len(road_blocks_cluster) > 0:
172
+ road_blocks_cluster_groups.append([road_blocks_cluster[0], road_blocks_cluster[len(road_blocks_cluster)-1]])
173
+ road_blocks_cluster = []
174
+
175
+ return road_blocks_cluster_groups
176
+
177
+ def extract_line_from_image(image, lower_line, upper_line):
178
+ lower_boundary = np.min(lower_line[:, 0])
179
+ upper_boundary = np.min(upper_line[:, 0])
180
+ img_copy = np.copy(image)
181
+ r, c = img_copy.shape
182
+ for index in range(c-1):
183
+ img_copy[0:lower_line[index, 0], index] = 0
184
+ img_copy[upper_line[index, 0]:r, index] = 0
185
 
186
+ return img_copy[lower_boundary:upper_boundary, :]
187
 
188
+ def extract(image):
189
+ img = rgb2gray(image)
190
 
191
+ #img = rgb2gray(imread("Penwritten_2048x.jpeg"))
192
+ #img = rgb2gray(imread("test.jpg"))
193
+ #img = rgb2gray(imread(""))
194
 
195
 
196
 
197
 
198
+ sobel_image = sobel(img)
199
+ hpp = horizontal_projections(sobel_image)
200
 
201
 
202
+ warnings.filterwarnings("ignore")
203
+ #find the midway where we can make a threshold and extract the peaks regions
204
+ #divider parameter value is used to threshold the peak values from non peak values.
205
 
206
 
207
+ peaks = find_peak_regions(hpp)
208
 
209
+ peaks_index = np.array(peaks)[:,0].astype(int)
210
+ #print(peaks_index.shape)
211
+ segmented_img = np.copy(img)
212
+ r= segmented_img.shape
213
+ for ri in range(r[0]):
214
+ if ri in peaks_index:
215
+ segmented_img[ri, :] = 0
216
 
217
+ #group the peaks into walking windows
218
 
219
 
220
+ hpp_clusters = get_hpp_walking_regions(peaks_index)
221
+ #a star path planning algorithm
222
 
223
 
224
 
 
226
 
227
 
228
 
229
+ #Scan the paths to see if there are any blockers.
230
 
231
 
232
 
233
 
234
+ binary_image = get_binary(img)
235
 
236
+ for cluster_of_interest in hpp_clusters:
237
+ nmap = binary_image[cluster_of_interest[0]:cluster_of_interest[len(cluster_of_interest)-1],:]
238
+ road_blocks = get_road_block_regions(nmap)
239
+ road_blocks_cluster_groups = group_the_road_blocks(road_blocks)
240
+ #create the doorways
241
+ for index, road_blocks in enumerate(road_blocks_cluster_groups):
242
+ window_image = nmap[:, road_blocks[0]: road_blocks[1]+10]
243
+ binary_image[cluster_of_interest[0]:cluster_of_interest[len(cluster_of_interest)-1],:][:, road_blocks[0]: road_blocks[1]+10][int(window_image.shape[0]/2),:] *= 0
244
 
245
+ #now that everything is cleaner, its time to segment all the lines using the A* algorithm
246
+ line_segments = []
247
+ #print(len(hpp_clusters))
248
+ #print(hpp_clusters)
249
+ for i, cluster_of_interest in enumerate(hpp_clusters):
250
+ nmap = binary_image[cluster_of_interest[0]:cluster_of_interest[len(cluster_of_interest)-1],:]
251
+ path = np.array(astar(nmap, (int(nmap.shape[0]/2), 0), (int(nmap.shape[0]/2),nmap.shape[1]-1)))
252
+ #print(path.shape)
253
+ if path.shape[0]!=0:
254
+ #break
255
+ offset_from_top = cluster_of_interest[0]
256
+ #print(offset_from_top)
257
+ path[:,0] += offset_from_top
258
+ #print(path)
259
+ line_segments.append(path)
260
+ #print(i)
261
 
262
+ cluster_of_interest = hpp_clusters[1]
263
+ offset_from_top = cluster_of_interest[0]
264
+ nmap = binary_image[cluster_of_interest[0]:cluster_of_interest[len(cluster_of_interest)-1],:]
265
+ #plt.figure(figsize=(20,20))
266
+ #plt.imshow(invert(nmap), cmap="gray")
267
 
268
+ path = np.array(astar(nmap, (int(nmap.shape[0]/2), 0), (int(nmap.shape[0]/2),nmap.shape[1]-1)))
269
+ #plt.plot(path[:,1], path[:,0])
270
 
271
+ offset_from_top = cluster_of_interest[0]
272
 
273
 
274
 
275
+ ## add an extra line to the line segments array which represents the last bottom row on the image
276
+ last_bottom_row = np.flip(np.column_stack(((np.ones((img.shape[1],))*img.shape[0]), np.arange(img.shape[1]))).astype(int), axis=0)
277
+ line_segments.append(last_bottom_row)
278
 
279
+ line_images = []
280
 
281
 
282
 
283
 
284
+ line_count = len(line_segments)
285
+ fig, ax = plt.subplots(figsize=(10,10), nrows=line_count-1)
286
+ output = []
287
 
288
 
289
+ for line_index in range(line_count-1):
290
+ line_image = extract_line_from_image(img, line_segments[line_index], line_segments[line_index+1])
291
+ line_images.append(line_image)
292
+ #print(line_image)
293
+ #cv2.imwrite('/Users/vatsalya/Desktop/demo.jpeg',line_image)
294
 
295
 
296
+ # im=Image.fromarray(line_image)
297
+ # im=im.convert("L")
298
+ # im.save("demo.jpeg")
299
+ # print("#### Image Saved #######")
300
+ new_p = Image.fromarray(line_image)
301
+ if new_p.mode != 'RGB':
302
+ new_p = new_p.convert('RGB')
303
+ imageio.imwrite('demo.jpeg',new_p)
304
 
305
 
306
 
307
+ image = Image.open("demo.jpeg").convert("RGB")
308
 
309
+ #print("Started Processing")
310
+ #image = line_image
311
+ pixel_values = processor(images=image, return_tensors="pt").pixel_values
312
 
313
+ generated_ids = model.generate(pixel_values)
314
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
315
+ print(generated_text)
316
+ output.append(generated_text)
317
+ #ax[line_index].imshow(line_image, cmap="gray")
318
+ result=""
319
+ for o in output:
320
+ result=result+o
321
+ result=result+" "
322
+ return result
323
 
324
 
325
 
 
446
  import base64
447
  def extract_eval(image1,image2,image3,image4):
448
  print(image1)
449
+ ideal_text=extract(image1)
 
 
 
 
 
 
 
 
 
 
 
 
450
  print(data)
451
  print("Extracting Ideal Text \n")
452
  print(ideal_text)