Spaces:
Running
Running
phyloforfun
commited on
Commit
·
26c9c07
1
Parent(s):
aaf86cc
file upload gallery
Browse files
app.py
CHANGED
@@ -618,7 +618,7 @@ def show_available_APIs():
|
|
618 |
emoji_bad = ":x:"
|
619 |
|
620 |
table = {
|
621 |
-
'Google OCR API': emoji_good if st.session_state['has_key_google_OCR'] else emoji_bad,
|
622 |
'OpenAI API': emoji_good if st.session_state['has_key_openai'] else emoji_bad,
|
623 |
'Palm API': emoji_good if st.session_state['has_key_palm2'] else emoji_bad,
|
624 |
'OpenAI API (Azure)': emoji_good if st.session_state['has_key_azure'] else emoji_bad,
|
@@ -1252,6 +1252,7 @@ def content_header():
|
|
1252 |
with col_run_1:
|
1253 |
show_header_welcome()
|
1254 |
st.subheader('Run VoucherVision')
|
|
|
1255 |
if check_if_usable():
|
1256 |
if st.button("Start Processing", type='primary'):
|
1257 |
|
@@ -1259,8 +1260,13 @@ def content_header():
|
|
1259 |
write_config_file(st.session_state.config, st.session_state.dir_home, filename="VoucherVision.yaml")
|
1260 |
|
1261 |
path_custom_prompts = os.path.join(st.session_state.dir_home,'custom_prompts',st.session_state.config['leafmachine']['project']['prompt_version'])
|
|
|
|
|
|
|
|
|
|
|
1262 |
# Call the machine function.
|
1263 |
-
last_JSON_response, total_cost, st.session_state['zip_filepath'] = voucher_vision(None, st.session_state.dir_home, path_custom_prompts, None, progress_report,path_api_cost=os.path.join(st.session_state.dir_home,'api_cost','api_cost.yaml'))
|
1264 |
|
1265 |
if total_cost:
|
1266 |
st.success(f":money_with_wings: This run cost :heavy_dollar_sign:{total_cost:.4f}")
|
@@ -1327,10 +1333,6 @@ def content_tab_settings():
|
|
1327 |
|
1328 |
col_local_1, col_local_2 = st.columns([2,6])
|
1329 |
|
1330 |
-
# st.write("---")
|
1331 |
-
# st.header('Modules')
|
1332 |
-
# col_m1, col_m2 = st.columns(2)
|
1333 |
-
|
1334 |
st.write("---")
|
1335 |
st.header('Cropped Components')
|
1336 |
col_cropped_1, col_cropped_2 = st.columns([4,4])
|
@@ -1346,15 +1348,6 @@ def content_tab_settings():
|
|
1346 |
|
1347 |
### LLM Version
|
1348 |
with col_project_2:
|
1349 |
-
# LLM_VERSIONS_available =
|
1350 |
-
# st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images'] #st.text_input("Input images directory", st.session_state.config['leafmachine']['project'].get('dir_images_local', ''))
|
1351 |
-
# # st.session_state.config['leafmachine']['project']['continue_run_from_partial_xlsx'] = st.text_input("Continue run from partially completed project XLSX", st.session_state.config['leafmachine']['project'].get('continue_run_from_partial_xlsx', ''), disabled=True)
|
1352 |
-
# st.subheader('LLM Version')
|
1353 |
-
# st.session_state.config['leafmachine']['LLM_version'] = st.selectbox("LLM version", LLM_VERSIONS,
|
1354 |
-
# index=LLM_VERSIONS.index(st.session_state.config['leafmachine'].get('LLM_version', 'Azure GPT 4')),
|
1355 |
-
# label_visibility='collapsed')
|
1356 |
-
# st.markdown("""***Note:*** GPT-4 is significantly more expensive than GPT-3.5 """)
|
1357 |
-
|
1358 |
# Determine the available versions based on the API keys present
|
1359 |
available_versions = []
|
1360 |
for api_name, versions in st.session_state['LLM_VERSIONS'].items():
|
|
|
618 |
emoji_bad = ":x:"
|
619 |
|
620 |
table = {
|
621 |
+
'Google Vision OCR API (required!)': emoji_good if st.session_state['has_key_google_OCR'] else emoji_bad,
|
622 |
'OpenAI API': emoji_good if st.session_state['has_key_openai'] else emoji_bad,
|
623 |
'Palm API': emoji_good if st.session_state['has_key_palm2'] else emoji_bad,
|
624 |
'OpenAI API (Azure)': emoji_good if st.session_state['has_key_azure'] else emoji_bad,
|
|
|
1252 |
with col_run_1:
|
1253 |
show_header_welcome()
|
1254 |
st.subheader('Run VoucherVision')
|
1255 |
+
N_STEPS = 10
|
1256 |
if check_if_usable():
|
1257 |
if st.button("Start Processing", type='primary'):
|
1258 |
|
|
|
1260 |
write_config_file(st.session_state.config, st.session_state.dir_home, filename="VoucherVision.yaml")
|
1261 |
|
1262 |
path_custom_prompts = os.path.join(st.session_state.dir_home,'custom_prompts',st.session_state.config['leafmachine']['project']['prompt_version'])
|
1263 |
+
|
1264 |
+
# Define number of overall steps
|
1265 |
+
progress_report.set_n_overall(N_STEPS)
|
1266 |
+
progress_report.update_overall(f"Starting VoucherVision...")
|
1267 |
+
|
1268 |
# Call the machine function.
|
1269 |
+
last_JSON_response, total_cost, st.session_state['zip_filepath'] = voucher_vision(None, st.session_state.dir_home, path_custom_prompts, None, progress_report,path_api_cost=os.path.join(st.session_state.dir_home,'api_cost','api_cost.yaml'), is_real_run=True)
|
1270 |
|
1271 |
if total_cost:
|
1272 |
st.success(f":money_with_wings: This run cost :heavy_dollar_sign:{total_cost:.4f}")
|
|
|
1333 |
|
1334 |
col_local_1, col_local_2 = st.columns([2,6])
|
1335 |
|
|
|
|
|
|
|
|
|
1336 |
st.write("---")
|
1337 |
st.header('Cropped Components')
|
1338 |
col_cropped_1, col_cropped_2 = st.columns([4,4])
|
|
|
1348 |
|
1349 |
### LLM Version
|
1350 |
with col_project_2:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1351 |
# Determine the available versions based on the API keys present
|
1352 |
available_versions = []
|
1353 |
for api_name, versions in st.session_state['LLM_VERSIONS'].items():
|
vouchervision/component_detector/component_detector.py
CHANGED
@@ -112,14 +112,18 @@ def detect_plant_components(cfg, logger, dir_home, Project, Dirs):
|
|
112 |
return Project
|
113 |
|
114 |
|
115 |
-
def detect_archival_components(cfg, logger, dir_home, Project, Dirs):
|
116 |
if not cfg['leafmachine']['use_RGB_label_images']:
|
117 |
logger.name = 'Skipping LeafMachine2 Label Detection'
|
118 |
-
logger.info(f"Full image will be used instead of the label collage")
|
|
|
|
|
119 |
else:
|
120 |
t1_start = perf_counter()
|
121 |
logger.name = 'Locating Archival Components'
|
122 |
logger.info(f"Detecting archival components in {len(os.listdir(Project.dir_images))} images")
|
|
|
|
|
123 |
|
124 |
|
125 |
try:
|
|
|
112 |
return Project
|
113 |
|
114 |
|
115 |
+
def detect_archival_components(cfg, logger, dir_home, Project, Dirs, is_real_run=False, progress_report=None):
|
116 |
if not cfg['leafmachine']['use_RGB_label_images']:
|
117 |
logger.name = 'Skipping LeafMachine2 Label Detection'
|
118 |
+
logger.info(f"Full image will be used instead of the label collage")
|
119 |
+
if is_real_run:
|
120 |
+
progress_report.update_overall(f"Skipping LeafMachine2 Label Detection")
|
121 |
else:
|
122 |
t1_start = perf_counter()
|
123 |
logger.name = 'Locating Archival Components'
|
124 |
logger.info(f"Detecting archival components in {len(os.listdir(Project.dir_images))} images")
|
125 |
+
if is_real_run:
|
126 |
+
progress_report.update_overall(f"Creating LeafMachine2 Label Collage")
|
127 |
|
128 |
|
129 |
try:
|
vouchervision/utils_VoucherVision.py
CHANGED
@@ -765,7 +765,9 @@ class VoucherVision():
|
|
765 |
self.add_data_to_excel_from_response(self.path_transcription, response, filename_without_extension, path_to_crop, txt_file_path, jpg_file_path_OCR_helper, nt_in, nt_out)
|
766 |
return response
|
767 |
|
768 |
-
def process_specimen_batch(self, progress_report):
|
|
|
|
|
769 |
try:
|
770 |
if self.has_key:
|
771 |
if self.model_name:
|
|
|
765 |
self.add_data_to_excel_from_response(self.path_transcription, response, filename_without_extension, path_to_crop, txt_file_path, jpg_file_path_OCR_helper, nt_in, nt_out)
|
766 |
return response
|
767 |
|
768 |
+
def process_specimen_batch(self, progress_report, is_real_run=False):
|
769 |
+
if is_real_run:
|
770 |
+
progress_report.update_overall(f"Transcribing Labels")
|
771 |
try:
|
772 |
if self.has_key:
|
773 |
if self.model_name:
|
vouchervision/vouchervision_main.py
CHANGED
@@ -16,7 +16,7 @@ from fetch_data import fetch_data
|
|
16 |
from utils_VoucherVision import VoucherVision, space_saver
|
17 |
|
18 |
|
19 |
-
def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progress_report, path_api_cost=None, test_ind = None):
|
20 |
# get_n_overall = progress_report.get_n_overall()
|
21 |
# progress_report.update_overall(f"Working on {test_ind+1} of {get_n_overall}")
|
22 |
|
@@ -43,6 +43,8 @@ def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progr
|
|
43 |
# cfg['leafmachine']['project']['run_name'] = run_name[dir_ind]
|
44 |
|
45 |
# Dir structure
|
|
|
|
|
46 |
print_main_start("Creating Directory Structure")
|
47 |
Dirs = Dir_Structure(cfg)
|
48 |
|
@@ -50,6 +52,8 @@ def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progr
|
|
50 |
logger = start_logging(Dirs, cfg)
|
51 |
|
52 |
# Check to see if required ML files are ready to use
|
|
|
|
|
53 |
ready_to_use = fetch_data(logger, dir_home, cfg_file_path)
|
54 |
assert ready_to_use, "Required ML files are not ready to use!\nThe download may have failed,\nor\nthe directory structure of LM2 has been altered"
|
55 |
|
@@ -62,15 +66,15 @@ def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progr
|
|
62 |
|
63 |
# Detect Archival Components
|
64 |
print_main_start("Locating Archival Components")
|
65 |
-
Project = detect_archival_components(cfg, logger, dir_home, Project, Dirs)
|
66 |
-
|
67 |
# Save cropped detections
|
68 |
crop_detections_from_images_VV(cfg, logger, dir_home, Project, Dirs)
|
69 |
|
70 |
# Process labels
|
71 |
Voucher_Vision = VoucherVision(cfg, logger, dir_home, path_custom_prompts, Project, Dirs)
|
72 |
n_images = len(Voucher_Vision.img_paths)
|
73 |
-
last_JSON_response, total_tokens_in, total_tokens_out = Voucher_Vision.process_specimen_batch(progress_report)
|
74 |
|
75 |
if path_api_cost:
|
76 |
cost_summary, data, total_cost = save_token_info_as_csv(Dirs, cfg['leafmachine']['LLM_version'], path_api_cost, total_tokens_in, total_tokens_out, n_images)
|
@@ -84,6 +88,9 @@ def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progr
|
|
84 |
logger.info(f"[Total elapsed time] {round((t_overall_s - t_overall)/60)} minutes")
|
85 |
space_saver(cfg, Dirs, logger)
|
86 |
|
|
|
|
|
|
|
87 |
for handler in logger.handlers[:]:
|
88 |
handler.close()
|
89 |
logger.removeHandler(handler)
|
|
|
16 |
from utils_VoucherVision import VoucherVision, space_saver
|
17 |
|
18 |
|
19 |
+
def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progress_report, path_api_cost=None, test_ind = None, is_real_run=False):
|
20 |
# get_n_overall = progress_report.get_n_overall()
|
21 |
# progress_report.update_overall(f"Working on {test_ind+1} of {get_n_overall}")
|
22 |
|
|
|
43 |
# cfg['leafmachine']['project']['run_name'] = run_name[dir_ind]
|
44 |
|
45 |
# Dir structure
|
46 |
+
if is_real_run:
|
47 |
+
progress_report.update_overall(f"Creating Output Directory Structure")
|
48 |
print_main_start("Creating Directory Structure")
|
49 |
Dirs = Dir_Structure(cfg)
|
50 |
|
|
|
52 |
logger = start_logging(Dirs, cfg)
|
53 |
|
54 |
# Check to see if required ML files are ready to use
|
55 |
+
if is_real_run:
|
56 |
+
progress_report.update_overall(f"Fetching LeafMachine2 Files")
|
57 |
ready_to_use = fetch_data(logger, dir_home, cfg_file_path)
|
58 |
assert ready_to_use, "Required ML files are not ready to use!\nThe download may have failed,\nor\nthe directory structure of LM2 has been altered"
|
59 |
|
|
|
66 |
|
67 |
# Detect Archival Components
|
68 |
print_main_start("Locating Archival Components")
|
69 |
+
Project = detect_archival_components(cfg, logger, dir_home, Project, Dirs, is_real_run, progress_report)
|
70 |
+
|
71 |
# Save cropped detections
|
72 |
crop_detections_from_images_VV(cfg, logger, dir_home, Project, Dirs)
|
73 |
|
74 |
# Process labels
|
75 |
Voucher_Vision = VoucherVision(cfg, logger, dir_home, path_custom_prompts, Project, Dirs)
|
76 |
n_images = len(Voucher_Vision.img_paths)
|
77 |
+
last_JSON_response, total_tokens_in, total_tokens_out = Voucher_Vision.process_specimen_batch(progress_report, is_real_run)
|
78 |
|
79 |
if path_api_cost:
|
80 |
cost_summary, data, total_cost = save_token_info_as_csv(Dirs, cfg['leafmachine']['LLM_version'], path_api_cost, total_tokens_in, total_tokens_out, n_images)
|
|
|
88 |
logger.info(f"[Total elapsed time] {round((t_overall_s - t_overall)/60)} minutes")
|
89 |
space_saver(cfg, Dirs, logger)
|
90 |
|
91 |
+
if is_real_run:
|
92 |
+
progress_report.update_overall(f"Run Complete! :sunglasses:")
|
93 |
+
|
94 |
for handler in logger.handlers[:]:
|
95 |
handler.close()
|
96 |
logger.removeHandler(handler)
|