File size: 7,504 Bytes
87c3140
 
 
aedd7d9
87c3140
 
 
 
 
 
e91ac58
87c3140
 
 
 
 
aedd7d9
87c3140
e91ac58
87c3140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26c9c07
 
87c3140
 
 
 
 
 
 
26c9c07
 
87c3140
 
 
 
 
 
 
 
 
 
 
 
26c9c07
e91ac58
87c3140
 
 
 
e91ac58
87c3140
e91ac58
 
 
87c3140
 
 
 
 
 
26c9c07
e91ac58
87c3140
e91ac58
93fd830
aedd7d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87c3140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
'''
VoucherVision - based on LeafMachine2 Processes
'''
import os, inspect, sys, logging, subprocess, shutil
from time import perf_counter
currentdir = os.path.dirname(os.path.dirname(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.append(parentdir)
sys.path.append(currentdir)
from vouchervision.component_detector.component_detector import detect_plant_components, detect_archival_components
from general_utils import add_to_expense_report, save_token_info_as_csv, print_main_start, check_for_subdirs_VV, load_config_file, load_config_file_testing, report_config, save_config_file, subset_dir_images, crop_detections_from_images_VV
from directory_structure_VV import Dir_Structure
from data_project import Project_Info
from LM2_logger import start_logging
from fetch_data import fetch_data
from utils_VoucherVision import VoucherVision, space_saver
from vouchervision.utils_hf import upload_to_drive

def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progress_report, json_report, path_api_cost=None, test_ind = None, is_hf = True, is_real_run=False):
    t_overall = perf_counter()

    # Load config file
    report_config(dir_home, cfg_file_path, system='VoucherVision')

    if cfg_test is None:
        cfg = load_config_file(dir_home, cfg_file_path, system='VoucherVision')  # For VoucherVision
    else:
        cfg = cfg_test 

    # Check to see if there are subdirs
    # Yes --> use the names of the subsirs as run_name
    run_name, dirs_list, has_subdirs = check_for_subdirs_VV(cfg)
    print(f"run_name {run_name} dirs_list{dirs_list} has_subdirs{has_subdirs}")

    # Dir structure
    if is_real_run:
        progress_report.update_overall(f"Creating Output Directory Structure")
    print_main_start("Creating Directory Structure")
    Dirs = Dir_Structure(cfg)

    # logging.info("Hi")
    logger = start_logging(Dirs, cfg)

    # Check to see if required ML files are ready to use
    if is_real_run:
        progress_report.update_overall(f"Fetching LeafMachine2 Files")
    ready_to_use = fetch_data(logger, dir_home, cfg_file_path)
    assert ready_to_use, "Required ML files are not ready to use!\nThe download may have failed,\nor\nthe directory structure of LM2 has been altered"

    # Wrangle images and preprocess
    print_main_start("Gathering Images and Image Metadata")
    Project = Project_Info(cfg, logger, dir_home, Dirs) # Where file names are modified

    # Save config file
    save_config_file(cfg, logger, Dirs)

    # Detect Archival Components
    print_main_start("Locating Archival Components")
    Project = detect_archival_components(cfg, logger, dir_home, Project, Dirs, is_real_run, progress_report)

    # Save cropped detections
    crop_detections_from_images_VV(cfg, logger, dir_home, Project, Dirs)

    # Process labels
    Voucher_Vision = VoucherVision(cfg, logger, dir_home, path_custom_prompts, Project, Dirs, is_hf)
    n_images = len(Voucher_Vision.img_paths)
    last_JSON_response, final_WFO_record, final_GEO_record, total_tokens_in, total_tokens_out = Voucher_Vision.process_specimen_batch(progress_report, json_report, is_real_run)

    total_cost = save_token_info_as_csv(Dirs, cfg['leafmachine']['LLM_version'], path_api_cost, total_tokens_in, total_tokens_out, n_images, dir_home, logger)

    t_overall_s = perf_counter()
    logger.name = 'Run Complete! :)'
    logger.info(f"[Total elapsed time] {round((t_overall_s - t_overall)/60)} minutes")
    space_saver(cfg, Dirs, logger)

    if is_real_run:
        progress_report.update_overall(f"Run Complete!")

    Voucher_Vision.close_logger_handlers()

    zip_filepath = None
    if is_hf:
        # Create Higging Face zip file
        dir_to_zip = os.path.join(Dirs.dir_home, Dirs.run_name)  
        zip_filename = Dirs.run_name  

        # Creating a zip file
        zip_filepath = make_zipfile(dir_to_zip, zip_filename)
        upload_to_drive(zip_filepath, zip_filename)

    return last_JSON_response, final_WFO_record, final_GEO_record, total_cost, Voucher_Vision.n_failed_OCR, Voucher_Vision.n_failed_LLM_calls, zip_filepath 

def make_zipfile(source_dir, output_filename):
    shutil.make_archive(output_filename, 'zip', source_dir)
    return output_filename + '.zip'

def voucher_vision_OCR_test(cfg_file_path, dir_home, cfg_test, path_to_crop):
    # get_n_overall = progress_report.get_n_overall()
    # progress_report.update_overall(f"Working on {test_ind+1} of {get_n_overall}")

    # Load config file
    report_config(dir_home, cfg_file_path, system='VoucherVision')

    if cfg_test is None:
        cfg = load_config_file(dir_home, cfg_file_path, system='VoucherVision')  # For VoucherVision
    else:
        cfg = cfg_test 
    # user_cfg = load_config_file(dir_home, cfg_file_path)
    # cfg = Config(user_cfg)

    # Check to see if there are subdirs
    # Yes --> use the names of the subsirs as run_name
    run_name, dirs_list, has_subdirs = check_for_subdirs_VV(cfg)
    print(f"run_name {run_name} dirs_list{dirs_list} has_subdirs{has_subdirs}")

    # for dir_ind, dir_in in enumerate(dirs_list):
    #     if has_subdirs:
    #         cfg['leafmachine']['project']['dir_images_local'] = dir_in
    #         cfg['leafmachine']['project']['run_name'] = run_name[dir_ind]

    # Dir structure
    print_main_start("Creating Directory Structure")
    Dirs = Dir_Structure(cfg)

    # logging.info("Hi")
    logger = start_logging(Dirs, cfg)

    # Check to see if required ML files are ready to use
    ready_to_use = fetch_data(logger, dir_home, cfg_file_path)
    assert ready_to_use, "Required ML files are not ready to use!\nThe download may have failed,\nor\nthe directory structure of LM2 has been altered"

    # Wrangle images and preprocess
    print_main_start("Gathering Images and Image Metadata")
    Project = Project_Info(cfg, logger, dir_home, Dirs) # Where file names are modified

    # Save config file
    save_config_file(cfg, logger, Dirs)

    # Detect Archival Components
    print_main_start("Locating Archival Components")
    Project = detect_archival_components(cfg, logger, dir_home, Project, Dirs)

    # Save cropped detections
    crop_detections_from_images_VV(cfg, logger, dir_home, Project, Dirs)

    # Process labels
    Voucher_Vision = VoucherVision(cfg, logger, dir_home, None, Project, Dirs)
    last_JSON_response = Voucher_Vision.process_specimen_batch_OCR_test(path_to_crop)

if __name__ == '__main__':    
    is_test = False

    # Set LeafMachine2 dir 
    dir_home = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))

    if is_test:
        cfg_file_path = os.path.join(dir_home, 'demo','demo.yaml') #'D:\Dropbox\LeafMachine2\LeafMachine2.yaml'
        # cfg_file_path = 'test_installation'

        cfg_testing = load_config_file_testing(dir_home, cfg_file_path)
        cfg_testing['leafmachine']['project']['dir_images_local'] = os.path.join(dir_home, cfg_testing['leafmachine']['project']['dir_images_local'][0], cfg_testing['leafmachine']['project']['dir_images_local'][1])
        cfg_testing['leafmachine']['project']['dir_output'] = os.path.join(dir_home, cfg_testing['leafmachine']['project']['dir_output'][0], cfg_testing['leafmachine']['project']['dir_output'][1])

        last_JSON_response = voucher_vision(cfg_file_path, dir_home, cfg_testing, None)
    else:
        cfg_file_path = None
        cfg_testing = None
        last_JSON_response = voucher_vision(cfg_file_path, dir_home, cfg_testing, None)