Spaces:
Running
Running
fix bugs
Browse files- app.py +25 -20
- models/__pycache__/best_norm_ED.cpython-310.pyc +0 -0
- models/best_norm_ED.pth +1 -1
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
|
2 |
import streamlit as st
|
3 |
from PIL import Image
|
4 |
import os
|
@@ -10,7 +9,6 @@ from pdf2image import convert_from_bytes
|
|
10 |
#from st_btn_group import st_btn_group
|
11 |
#from streamlit_option_menu import option_menu
|
12 |
import docx
|
13 |
-
from docx.shared import Pt
|
14 |
from io import BytesIO
|
15 |
#import streamlit.components.v1 as components
|
16 |
import base64
|
@@ -118,12 +116,12 @@ col1, col2 = st.columns(2)
|
|
118 |
# return image, result
|
119 |
import time
|
120 |
|
121 |
-
max_page =
|
122 |
def recognize_page_image(image):
|
123 |
start = time.time()
|
124 |
result = [[0,"Sample 1"],[1,"Sample 2"]]
|
125 |
-
result = reader.readtext(np.array(image), paragraph=False)
|
126 |
-
result = get_paragraph(result)
|
127 |
end = time.time()
|
128 |
return result,(end-start)
|
129 |
|
@@ -164,7 +162,7 @@ def process_pdf(uploaded_file):
|
|
164 |
button_group.write(button_group_html,unsafe_allow_html=True)
|
165 |
#col1.write("</div>",unsafe_allow_html=True)
|
166 |
progress_bar.progress(0.99,text=f'{min(total_pages,max_page)} бет жүктелді')
|
167 |
-
|
168 |
def get_paragraph(raw_result, x_ths=1, y_ths=0.5, mode = 'ltr'):
|
169 |
# create basic attributes
|
170 |
box_group = []
|
@@ -188,18 +186,24 @@ def get_paragraph(raw_result, x_ths=1, y_ths=0.5, mode = 'ltr'):
|
|
188 |
else:
|
189 |
current_box_group = [box for box in box_group if box[7]==current_group]
|
190 |
mean_height = np.mean([box[5] for box in current_box_group])
|
191 |
-
min_gx = min([box[1] for box in current_box_group]) - x_ths*mean_height
|
192 |
-
max_gx = max([box[2] for box in current_box_group]) + x_ths*mean_height
|
193 |
-
min_gy = min([box[3] for box in current_box_group]) - y_ths*mean_height
|
194 |
-
max_gy = max([box[4] for box in current_box_group]) + y_ths*mean_height
|
195 |
add_box = False
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
# cannot add more box, go to next group
|
204 |
if add_box==False:
|
205 |
current_group += 1
|
@@ -233,6 +237,7 @@ def get_paragraph(raw_result, x_ths=1, y_ths=0.5, mode = 'ltr'):
|
|
233 |
|
234 |
return result
|
235 |
|
|
|
236 |
if uploaded_file is not None:
|
237 |
if uploaded_file.type == "application/pdf":
|
238 |
placeholder = col2.empty()
|
@@ -247,9 +252,9 @@ if uploaded_file is not None:
|
|
247 |
image = Image.open(uploaded_file)
|
248 |
#with open(os.path.join("tempDir",image_file))
|
249 |
col1.image(image)
|
250 |
-
result = reader.readtext(np.array(image), paragraph=
|
|
|
251 |
result_text = "\n\n".join([item[1] for item in result])
|
252 |
button_group_html = generateButtonGroup(result)
|
253 |
col2.write(button_group_html, unsafe_allow_html=True)
|
254 |
-
col2.markdown(result_text)
|
255 |
-
|
|
|
|
|
1 |
import streamlit as st
|
2 |
from PIL import Image
|
3 |
import os
|
|
|
9 |
#from st_btn_group import st_btn_group
|
10 |
#from streamlit_option_menu import option_menu
|
11 |
import docx
|
|
|
12 |
from io import BytesIO
|
13 |
#import streamlit.components.v1 as components
|
14 |
import base64
|
|
|
116 |
# return image, result
|
117 |
import time
|
118 |
|
119 |
+
max_page = 5
|
120 |
def recognize_page_image(image):
|
121 |
start = time.time()
|
122 |
result = [[0,"Sample 1"],[1,"Sample 2"]]
|
123 |
+
result = reader.readtext(np.array(image), batch_size=64, paragraph=False, y_ths=0, width_ths = 0)
|
124 |
+
result = get_paragraph(result, y_ths=0, x_ths = 0)
|
125 |
end = time.time()
|
126 |
return result,(end-start)
|
127 |
|
|
|
162 |
button_group.write(button_group_html,unsafe_allow_html=True)
|
163 |
#col1.write("</div>",unsafe_allow_html=True)
|
164 |
progress_bar.progress(0.99,text=f'{min(total_pages,max_page)} бет жүктелді')
|
165 |
+
|
166 |
def get_paragraph(raw_result, x_ths=1, y_ths=0.5, mode = 'ltr'):
|
167 |
# create basic attributes
|
168 |
box_group = []
|
|
|
186 |
else:
|
187 |
current_box_group = [box for box in box_group if box[7]==current_group]
|
188 |
mean_height = np.mean([box[5] for box in current_box_group])
|
189 |
+
# min_gx = min([box[1] for box in current_box_group]) - x_ths*mean_height
|
190 |
+
# max_gx = max([box[2] for box in current_box_group]) + x_ths*mean_height
|
191 |
+
# min_gy = min([box[3] for box in current_box_group]) - y_ths*mean_height
|
192 |
+
# max_gy = max([box[4] for box in current_box_group]) + y_ths*mean_height
|
193 |
add_box = False
|
194 |
+
|
195 |
+
for box in current_box_group:
|
196 |
+
min_gx = box[1] - x_ths*mean_height
|
197 |
+
max_gx = box[2] + x_ths*mean_height
|
198 |
+
min_gy = box[3] - y_ths*mean_height
|
199 |
+
max_gy = box[4] + y_ths*mean_height
|
200 |
+
for box in box_group0:
|
201 |
+
same_horizontal_level = (min_gx<=box[1]<=max_gx) or (min_gx<=box[2]<=max_gx)
|
202 |
+
same_vertical_level = (min_gy<=box[6]<=max_gy)
|
203 |
+
if same_horizontal_level and same_vertical_level:
|
204 |
+
box[7] = current_group
|
205 |
+
add_box = True
|
206 |
+
break
|
207 |
# cannot add more box, go to next group
|
208 |
if add_box==False:
|
209 |
current_group += 1
|
|
|
237 |
|
238 |
return result
|
239 |
|
240 |
+
|
241 |
if uploaded_file is not None:
|
242 |
if uploaded_file.type == "application/pdf":
|
243 |
placeholder = col2.empty()
|
|
|
252 |
image = Image.open(uploaded_file)
|
253 |
#with open(os.path.join("tempDir",image_file))
|
254 |
col1.image(image)
|
255 |
+
result = reader.readtext(np.array(image), batch_size=64, paragraph=False, y_ths=0, width_ths = 0)
|
256 |
+
result = get_paragraph(result, y_ths=0)
|
257 |
result_text = "\n\n".join([item[1] for item in result])
|
258 |
button_group_html = generateButtonGroup(result)
|
259 |
col2.write(button_group_html, unsafe_allow_html=True)
|
260 |
+
col2.markdown(result_text)
|
|
models/__pycache__/best_norm_ED.cpython-310.pyc
CHANGED
Binary files a/models/__pycache__/best_norm_ED.cpython-310.pyc and b/models/__pycache__/best_norm_ED.cpython-310.pyc differ
|
|
models/best_norm_ED.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15217067
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a0a93dd748a84d3998efccee420e3cabdf6b1693d3411374e871bcdb8c078169
|
3 |
size 15217067
|