Spaces:
Running
Running
phyloforfun
commited on
Commit
•
e9f4039
1
Parent(s):
448a637
update api prices, openai dev day
Browse files
app.py
CHANGED
@@ -1096,6 +1096,11 @@ def content_tab_settings():
|
|
1096 |
st.session_state.config['leafmachine']['cropped_components']['save_cropped_annotations'] = st.multiselect("Components to crop",
|
1097 |
['ruler', 'barcode','label', 'colorcard','map','envelope','photo','attached_item','weights',
|
1098 |
'leaf_whole', 'leaf_partial', 'leaflet', 'seed_fruit_one', 'seed_fruit_many', 'flower_one', 'flower_many', 'bud','specimen','roots','wood'],default=default_crops)
|
|
|
|
|
|
|
|
|
|
|
1099 |
with col_cropped_2:
|
1100 |
ba = os.path.join(st.session_state.dir_home,'demo', 'ba','ba2.png')
|
1101 |
image = Image.open(ba)
|
|
|
1096 |
st.session_state.config['leafmachine']['cropped_components']['save_cropped_annotations'] = st.multiselect("Components to crop",
|
1097 |
['ruler', 'barcode','label', 'colorcard','map','envelope','photo','attached_item','weights',
|
1098 |
'leaf_whole', 'leaf_partial', 'leaflet', 'seed_fruit_one', 'seed_fruit_many', 'flower_one', 'flower_many', 'bud','specimen','roots','wood'],default=default_crops)
|
1099 |
+
|
1100 |
+
st.subheader('Create OCR Overlay Image')
|
1101 |
+
st.write('This will plot bounding boxes around all text that Google Vision was able to detect. If there are no boxes around text, then the OCR failed, so that missing text will not be seen by the LLM when it is creating the JSON object. The created image will be viewable in the VoucherVisionEditor.')
|
1102 |
+
st.session_state.config['leafmachine']['do_create_OCR_helper_image'] = st.checkbox("Create image showing an overlay of the OCR detections", st.session_state.config['leafmachine'].get('do_create_OCR_helper_image', False))
|
1103 |
+
|
1104 |
with col_cropped_2:
|
1105 |
ba = os.path.join(st.session_state.dir_home,'demo', 'ba','ba2.png')
|
1106 |
image = Image.open(ba)
|
vouchervision/OCR_google_cloud_vision.py
CHANGED
@@ -60,10 +60,14 @@ def detect_text(path, client):
|
|
60 |
else:
|
61 |
return '', None, None
|
62 |
|
63 |
-
def overlay_boxes_on_image(path, bounds):
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
67 |
|
68 |
|
69 |
|
|
|
60 |
else:
|
61 |
return '', None, None
|
62 |
|
63 |
+
def overlay_boxes_on_image(path, bounds,do_create_OCR_helper_image):
|
64 |
+
if do_create_OCR_helper_image:
|
65 |
+
image = Image.open(path)
|
66 |
+
draw_boxes(image, bounds, "green")
|
67 |
+
return image
|
68 |
+
else:
|
69 |
+
image = Image.open(path)
|
70 |
+
return image
|
71 |
|
72 |
|
73 |
|
vouchervision/VoucherVision_Config_Builder.py
CHANGED
@@ -35,6 +35,7 @@ def build_VV_config():
|
|
35 |
LLM_version_user = 'Azure GPT 4'
|
36 |
prompt_version = 'Version 2' # from ["Version 1", "Version 1 No Domain Knowledge", "Version 2"]
|
37 |
use_LeafMachine2_collage_images = False # Use LeafMachine2 collage images
|
|
|
38 |
|
39 |
batch_size = 500
|
40 |
|
@@ -49,12 +50,12 @@ def build_VV_config():
|
|
49 |
return assemble_config(dir_home, run_name, dir_images_local,dir_output,
|
50 |
prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
|
51 |
path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
|
52 |
-
prompt_version, use_domain_knowledge=False)
|
53 |
|
54 |
def assemble_config(dir_home, run_name, dir_images_local,dir_output,
|
55 |
prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
|
56 |
path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
|
57 |
-
prompt_version, use_domain_knowledge=False):
|
58 |
|
59 |
|
60 |
# Initialize the base structure
|
@@ -104,6 +105,7 @@ def assemble_config(dir_home, run_name, dir_images_local,dir_output,
|
|
104 |
|
105 |
LLM_version = LLM_version_user
|
106 |
use_RGB_label_images = use_LeafMachine2_collage_images # Use LeafMachine2 collage images
|
|
|
107 |
|
108 |
cropped_components_section = {
|
109 |
'do_save_cropped_annotations': True,
|
@@ -166,6 +168,7 @@ def assemble_config(dir_home, run_name, dir_images_local,dir_output,
|
|
166 |
config_data['leafmachine']['project'] = project_section
|
167 |
config_data['leafmachine']['LLM_version'] = LLM_version
|
168 |
config_data['leafmachine']['use_RGB_label_images'] = use_RGB_label_images
|
|
|
169 |
config_data['leafmachine']['cropped_components'] = cropped_components_section
|
170 |
config_data['leafmachine']['modules'] = modules_section
|
171 |
config_data['leafmachine']['data'] = data_section
|
@@ -187,6 +190,7 @@ def build_api_tests(api):
|
|
187 |
suffix_removal = ''
|
188 |
catalog_numerical_only = False
|
189 |
batch_size = 500
|
|
|
190 |
|
191 |
|
192 |
# ### Option 1: "GPT 4" of ["GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5", "PaLM 2"]
|
@@ -234,7 +238,7 @@ def build_api_tests(api):
|
|
234 |
config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
|
235 |
prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
|
236 |
path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
|
237 |
-
prompt_version)
|
238 |
|
239 |
write_config_file(config_data, os.path.join(dir_home,'demo','demo_configs'),filename=filename)
|
240 |
|
@@ -258,6 +262,7 @@ def build_demo_tests(llm_version):
|
|
258 |
suffix_removal = ''
|
259 |
catalog_numerical_only = False
|
260 |
batch_size = 500
|
|
|
261 |
|
262 |
|
263 |
# ### Option 1: "GPT 4" of ["GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5", "PaLM 2"]
|
@@ -306,23 +311,23 @@ def build_demo_tests(llm_version):
|
|
306 |
config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
|
307 |
prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
|
308 |
path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
|
309 |
-
prompt_version, use_domain_knowledge=True)
|
310 |
else:
|
311 |
config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
|
312 |
prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
|
313 |
path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
|
314 |
-
prompt_version)
|
315 |
elif llm_version == 'palm':
|
316 |
if prompt_version in ['Version 1 PaLM 2']:
|
317 |
config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
|
318 |
prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
|
319 |
path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
|
320 |
-
prompt_version, use_domain_knowledge=True)
|
321 |
else:
|
322 |
config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
|
323 |
prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
|
324 |
path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
|
325 |
-
prompt_version)
|
326 |
|
327 |
|
328 |
write_config_file(config_data, os.path.join(dir_home,'demo','demo_configs'),filename=filename)
|
|
|
35 |
LLM_version_user = 'Azure GPT 4'
|
36 |
prompt_version = 'Version 2' # from ["Version 1", "Version 1 No Domain Knowledge", "Version 2"]
|
37 |
use_LeafMachine2_collage_images = False # Use LeafMachine2 collage images
|
38 |
+
do_create_OCR_helper_image = False
|
39 |
|
40 |
batch_size = 500
|
41 |
|
|
|
50 |
return assemble_config(dir_home, run_name, dir_images_local,dir_output,
|
51 |
prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
|
52 |
path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
|
53 |
+
prompt_version, do_create_OCR_helper_image, use_domain_knowledge=False)
|
54 |
|
55 |
def assemble_config(dir_home, run_name, dir_images_local,dir_output,
|
56 |
prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
|
57 |
path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
|
58 |
+
prompt_version, do_create_OCR_helper_image_user, use_domain_knowledge=False):
|
59 |
|
60 |
|
61 |
# Initialize the base structure
|
|
|
105 |
|
106 |
LLM_version = LLM_version_user
|
107 |
use_RGB_label_images = use_LeafMachine2_collage_images # Use LeafMachine2 collage images
|
108 |
+
do_create_OCR_helper_image = do_create_OCR_helper_image_user
|
109 |
|
110 |
cropped_components_section = {
|
111 |
'do_save_cropped_annotations': True,
|
|
|
168 |
config_data['leafmachine']['project'] = project_section
|
169 |
config_data['leafmachine']['LLM_version'] = LLM_version
|
170 |
config_data['leafmachine']['use_RGB_label_images'] = use_RGB_label_images
|
171 |
+
config_data['leafmachine']['do_create_OCR_helper_image'] = do_create_OCR_helper_image
|
172 |
config_data['leafmachine']['cropped_components'] = cropped_components_section
|
173 |
config_data['leafmachine']['modules'] = modules_section
|
174 |
config_data['leafmachine']['data'] = data_section
|
|
|
190 |
suffix_removal = ''
|
191 |
catalog_numerical_only = False
|
192 |
batch_size = 500
|
193 |
+
do_create_OCR_helper_image = False
|
194 |
|
195 |
|
196 |
# ### Option 1: "GPT 4" of ["GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5", "PaLM 2"]
|
|
|
238 |
config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
|
239 |
prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
|
240 |
path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
|
241 |
+
prompt_version, do_create_OCR_helper_image)
|
242 |
|
243 |
write_config_file(config_data, os.path.join(dir_home,'demo','demo_configs'),filename=filename)
|
244 |
|
|
|
262 |
suffix_removal = ''
|
263 |
catalog_numerical_only = False
|
264 |
batch_size = 500
|
265 |
+
do_create_OCR_helper_image = False
|
266 |
|
267 |
|
268 |
# ### Option 1: "GPT 4" of ["GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5", "PaLM 2"]
|
|
|
311 |
config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
|
312 |
prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
|
313 |
path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
|
314 |
+
prompt_version, do_create_OCR_helper_image, use_domain_knowledge=True)
|
315 |
else:
|
316 |
config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
|
317 |
prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
|
318 |
path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
|
319 |
+
prompt_version, do_create_OCR_helper_image)
|
320 |
elif llm_version == 'palm':
|
321 |
if prompt_version in ['Version 1 PaLM 2']:
|
322 |
config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
|
323 |
prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
|
324 |
path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
|
325 |
+
prompt_version, do_create_OCR_helper_image, use_domain_knowledge=True)
|
326 |
else:
|
327 |
config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
|
328 |
prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
|
329 |
path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
|
330 |
+
prompt_version, do_create_OCR_helper_image)
|
331 |
|
332 |
|
333 |
write_config_file(config_data, os.path.join(dir_home,'demo','demo_configs'),filename=filename)
|
vouchervision/utils_VoucherVision.py
CHANGED
@@ -604,7 +604,7 @@ class VoucherVision():
|
|
604 |
self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Finished OCR')
|
605 |
if len(self.OCR) > 0:
|
606 |
self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Creating OCR Overlay Image')
|
607 |
-
self.overlay_image = overlay_boxes_on_image(path_to_crop, self.bounds)
|
608 |
self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Saved OCR Overlay Image')
|
609 |
|
610 |
self.write_json_to_file(txt_file_path_OCR, {"OCR":self.OCR})
|
@@ -672,7 +672,7 @@ class VoucherVision():
|
|
672 |
self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Finished OCR')
|
673 |
|
674 |
self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Creating OCR Overlay Image')
|
675 |
-
self.overlay_image = overlay_boxes_on_image(path_to_crop, self.bounds)
|
676 |
self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Saved OCR Overlay Image')
|
677 |
|
678 |
self.write_json_to_file(txt_file_path_OCR, {"OCR":self.OCR})
|
|
|
604 |
self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Finished OCR')
|
605 |
if len(self.OCR) > 0:
|
606 |
self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Creating OCR Overlay Image')
|
607 |
+
self.overlay_image = overlay_boxes_on_image(path_to_crop, self.bounds, self.cfg['leafmachine']['do_create_OCR_helper_image'])
|
608 |
self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Saved OCR Overlay Image')
|
609 |
|
610 |
self.write_json_to_file(txt_file_path_OCR, {"OCR":self.OCR})
|
|
|
672 |
self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Finished OCR')
|
673 |
|
674 |
self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Creating OCR Overlay Image')
|
675 |
+
self.overlay_image = overlay_boxes_on_image(path_to_crop, self.bounds, self.cfg['leafmachine']['do_create_OCR_helper_image'])
|
676 |
self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Saved OCR Overlay Image')
|
677 |
|
678 |
self.write_json_to_file(txt_file_path_OCR, {"OCR":self.OCR})
|