phyloforfun commited on
Commit
e9f4039
1 Parent(s): 448a637

update api prices, openai dev day

Browse files
app.py CHANGED
@@ -1096,6 +1096,11 @@ def content_tab_settings():
1096
  st.session_state.config['leafmachine']['cropped_components']['save_cropped_annotations'] = st.multiselect("Components to crop",
1097
  ['ruler', 'barcode','label', 'colorcard','map','envelope','photo','attached_item','weights',
1098
  'leaf_whole', 'leaf_partial', 'leaflet', 'seed_fruit_one', 'seed_fruit_many', 'flower_one', 'flower_many', 'bud','specimen','roots','wood'],default=default_crops)
 
 
 
 
 
1099
  with col_cropped_2:
1100
  ba = os.path.join(st.session_state.dir_home,'demo', 'ba','ba2.png')
1101
  image = Image.open(ba)
 
1096
  st.session_state.config['leafmachine']['cropped_components']['save_cropped_annotations'] = st.multiselect("Components to crop",
1097
  ['ruler', 'barcode','label', 'colorcard','map','envelope','photo','attached_item','weights',
1098
  'leaf_whole', 'leaf_partial', 'leaflet', 'seed_fruit_one', 'seed_fruit_many', 'flower_one', 'flower_many', 'bud','specimen','roots','wood'],default=default_crops)
1099
+
1100
+ st.subheader('Create OCR Overlay Image')
1101
+ st.write('This will plot bounding boxes around all text that Google Vision was able to detect. If there are no boxes around text, then the OCR failed, so that missing text will not be seen by the LLM when it is creating the JSON object. The created image will be viewable in the VoucherVisionEditor.')
1102
+ st.session_state.config['leafmachine']['do_create_OCR_helper_image'] = st.checkbox("Create image showing an overlay of the OCR detections", st.session_state.config['leafmachine'].get('do_create_OCR_helper_image', False))
1103
+
1104
  with col_cropped_2:
1105
  ba = os.path.join(st.session_state.dir_home,'demo', 'ba','ba2.png')
1106
  image = Image.open(ba)
vouchervision/OCR_google_cloud_vision.py CHANGED
@@ -60,10 +60,14 @@ def detect_text(path, client):
60
  else:
61
  return '', None, None
62
 
63
- def overlay_boxes_on_image(path, bounds):
64
- image = Image.open(path)
65
- draw_boxes(image, bounds, "green")
66
- return image
 
 
 
 
67
 
68
 
69
 
 
60
  else:
61
  return '', None, None
62
 
63
+ def overlay_boxes_on_image(path, bounds,do_create_OCR_helper_image):
64
+ if do_create_OCR_helper_image:
65
+ image = Image.open(path)
66
+ draw_boxes(image, bounds, "green")
67
+ return image
68
+ else:
69
+ image = Image.open(path)
70
+ return image
71
 
72
 
73
 
vouchervision/VoucherVision_Config_Builder.py CHANGED
@@ -35,6 +35,7 @@ def build_VV_config():
35
  LLM_version_user = 'Azure GPT 4'
36
  prompt_version = 'Version 2' # from ["Version 1", "Version 1 No Domain Knowledge", "Version 2"]
37
  use_LeafMachine2_collage_images = False # Use LeafMachine2 collage images
 
38
 
39
  batch_size = 500
40
 
@@ -49,12 +50,12 @@ def build_VV_config():
49
  return assemble_config(dir_home, run_name, dir_images_local,dir_output,
50
  prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
51
  path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
52
- prompt_version, use_domain_knowledge=False)
53
 
54
  def assemble_config(dir_home, run_name, dir_images_local,dir_output,
55
  prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
56
  path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
57
- prompt_version, use_domain_knowledge=False):
58
 
59
 
60
  # Initialize the base structure
@@ -104,6 +105,7 @@ def assemble_config(dir_home, run_name, dir_images_local,dir_output,
104
 
105
  LLM_version = LLM_version_user
106
  use_RGB_label_images = use_LeafMachine2_collage_images # Use LeafMachine2 collage images
 
107
 
108
  cropped_components_section = {
109
  'do_save_cropped_annotations': True,
@@ -166,6 +168,7 @@ def assemble_config(dir_home, run_name, dir_images_local,dir_output,
166
  config_data['leafmachine']['project'] = project_section
167
  config_data['leafmachine']['LLM_version'] = LLM_version
168
  config_data['leafmachine']['use_RGB_label_images'] = use_RGB_label_images
 
169
  config_data['leafmachine']['cropped_components'] = cropped_components_section
170
  config_data['leafmachine']['modules'] = modules_section
171
  config_data['leafmachine']['data'] = data_section
@@ -187,6 +190,7 @@ def build_api_tests(api):
187
  suffix_removal = ''
188
  catalog_numerical_only = False
189
  batch_size = 500
 
190
 
191
 
192
  # ### Option 1: "GPT 4" of ["GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5", "PaLM 2"]
@@ -234,7 +238,7 @@ def build_api_tests(api):
234
  config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
235
  prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
236
  path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
237
- prompt_version)
238
 
239
  write_config_file(config_data, os.path.join(dir_home,'demo','demo_configs'),filename=filename)
240
 
@@ -258,6 +262,7 @@ def build_demo_tests(llm_version):
258
  suffix_removal = ''
259
  catalog_numerical_only = False
260
  batch_size = 500
 
261
 
262
 
263
  # ### Option 1: "GPT 4" of ["GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5", "PaLM 2"]
@@ -306,23 +311,23 @@ def build_demo_tests(llm_version):
306
  config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
307
  prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
308
  path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
309
- prompt_version, use_domain_knowledge=True)
310
  else:
311
  config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
312
  prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
313
  path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
314
- prompt_version)
315
  elif llm_version == 'palm':
316
  if prompt_version in ['Version 1 PaLM 2']:
317
  config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
318
  prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
319
  path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
320
- prompt_version, use_domain_knowledge=True)
321
  else:
322
  config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
323
  prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
324
  path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
325
- prompt_version)
326
 
327
 
328
  write_config_file(config_data, os.path.join(dir_home,'demo','demo_configs'),filename=filename)
 
35
  LLM_version_user = 'Azure GPT 4'
36
  prompt_version = 'Version 2' # from ["Version 1", "Version 1 No Domain Knowledge", "Version 2"]
37
  use_LeafMachine2_collage_images = False # Use LeafMachine2 collage images
38
+ do_create_OCR_helper_image = False
39
 
40
  batch_size = 500
41
 
 
50
  return assemble_config(dir_home, run_name, dir_images_local,dir_output,
51
  prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
52
  path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
53
+ prompt_version, do_create_OCR_helper_image, use_domain_knowledge=False)
54
 
55
  def assemble_config(dir_home, run_name, dir_images_local,dir_output,
56
  prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
57
  path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
58
+ prompt_version, do_create_OCR_helper_image_user, use_domain_knowledge=False):
59
 
60
 
61
  # Initialize the base structure
 
105
 
106
  LLM_version = LLM_version_user
107
  use_RGB_label_images = use_LeafMachine2_collage_images # Use LeafMachine2 collage images
108
+ do_create_OCR_helper_image = do_create_OCR_helper_image_user
109
 
110
  cropped_components_section = {
111
  'do_save_cropped_annotations': True,
 
168
  config_data['leafmachine']['project'] = project_section
169
  config_data['leafmachine']['LLM_version'] = LLM_version
170
  config_data['leafmachine']['use_RGB_label_images'] = use_RGB_label_images
171
+ config_data['leafmachine']['do_create_OCR_helper_image'] = do_create_OCR_helper_image
172
  config_data['leafmachine']['cropped_components'] = cropped_components_section
173
  config_data['leafmachine']['modules'] = modules_section
174
  config_data['leafmachine']['data'] = data_section
 
190
  suffix_removal = ''
191
  catalog_numerical_only = False
192
  batch_size = 500
193
+ do_create_OCR_helper_image = False
194
 
195
 
196
  # ### Option 1: "GPT 4" of ["GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5", "PaLM 2"]
 
238
  config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
239
  prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
240
  path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
241
+ prompt_version, do_create_OCR_helper_image)
242
 
243
  write_config_file(config_data, os.path.join(dir_home,'demo','demo_configs'),filename=filename)
244
 
 
262
  suffix_removal = ''
263
  catalog_numerical_only = False
264
  batch_size = 500
265
+ do_create_OCR_helper_image = False
266
 
267
 
268
  # ### Option 1: "GPT 4" of ["GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5", "PaLM 2"]
 
311
  config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
312
  prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
313
  path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
314
+ prompt_version, do_create_OCR_helper_image, use_domain_knowledge=True)
315
  else:
316
  config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
317
  prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
318
  path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
319
+ prompt_version, do_create_OCR_helper_image)
320
  elif llm_version == 'palm':
321
  if prompt_version in ['Version 1 PaLM 2']:
322
  config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
323
  prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
324
  path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
325
+ prompt_version, do_create_OCR_helper_image, use_domain_knowledge=True)
326
  else:
327
  config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
328
  prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
329
  path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
330
+ prompt_version, do_create_OCR_helper_image)
331
 
332
 
333
  write_config_file(config_data, os.path.join(dir_home,'demo','demo_configs'),filename=filename)
vouchervision/utils_VoucherVision.py CHANGED
@@ -604,7 +604,7 @@ class VoucherVision():
604
  self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Finished OCR')
605
  if len(self.OCR) > 0:
606
  self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Creating OCR Overlay Image')
607
- self.overlay_image = overlay_boxes_on_image(path_to_crop, self.bounds)
608
  self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Saved OCR Overlay Image')
609
 
610
  self.write_json_to_file(txt_file_path_OCR, {"OCR":self.OCR})
@@ -672,7 +672,7 @@ class VoucherVision():
672
  self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Finished OCR')
673
 
674
  self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Creating OCR Overlay Image')
675
- self.overlay_image = overlay_boxes_on_image(path_to_crop, self.bounds)
676
  self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Saved OCR Overlay Image')
677
 
678
  self.write_json_to_file(txt_file_path_OCR, {"OCR":self.OCR})
 
604
  self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Finished OCR')
605
  if len(self.OCR) > 0:
606
  self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Creating OCR Overlay Image')
607
+ self.overlay_image = overlay_boxes_on_image(path_to_crop, self.bounds, self.cfg['leafmachine']['do_create_OCR_helper_image'])
608
  self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Saved OCR Overlay Image')
609
 
610
  self.write_json_to_file(txt_file_path_OCR, {"OCR":self.OCR})
 
672
  self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Finished OCR')
673
 
674
  self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Creating OCR Overlay Image')
675
+ self.overlay_image = overlay_boxes_on_image(path_to_crop, self.bounds, self.cfg['leafmachine']['do_create_OCR_helper_image'])
676
  self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Saved OCR Overlay Image')
677
 
678
  self.write_json_to_file(txt_file_path_OCR, {"OCR":self.OCR})