File size: 7,147 Bytes
87c3140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
'''
VoucherVision - based on LeafMachine2 Processes
'''
import os, inspect, sys, logging, subprocess
from time import perf_counter
currentdir = os.path.dirname(os.path.dirname(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.append(parentdir)
sys.path.append(currentdir)
from vouchervision.component_detector.component_detector import detect_plant_components, detect_archival_components
from general_utils import add_to_expense_report, save_token_info_as_csv, print_main_start, check_for_subdirs_VV, load_config_file, load_config_file_testing, report_config, save_config_file, subset_dir_images, crop_detections_from_images_VV
from directory_structure_VV import Dir_Structure
from data_project import Project_Info
from LM2_logger import start_logging
from fetch_data import fetch_data
from utils_VoucherVision import VoucherVision, space_saver


def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progress_report, path_api_cost=None, test_ind = None):
    # get_n_overall = progress_report.get_n_overall()
    # progress_report.update_overall(f"Working on {test_ind+1} of {get_n_overall}")

    t_overall = perf_counter()

    # Load config file
    report_config(dir_home, cfg_file_path, system='VoucherVision')

    if cfg_test is None:
        cfg = load_config_file(dir_home, cfg_file_path, system='VoucherVision')  # For VoucherVision
    else:
        cfg = cfg_test 
    # user_cfg = load_config_file(dir_home, cfg_file_path)
    # cfg = Config(user_cfg)

    # Check to see if there are subdirs
    # Yes --> use the names of the subsirs as run_name
    run_name, dirs_list, has_subdirs = check_for_subdirs_VV(cfg)
    print(f"run_name {run_name} dirs_list{dirs_list} has_subdirs{has_subdirs}")

    # for dir_ind, dir_in in enumerate(dirs_list):
    #     if has_subdirs:
    #         cfg['leafmachine']['project']['dir_images_local'] = dir_in
    #         cfg['leafmachine']['project']['run_name'] = run_name[dir_ind]

    # Dir structure
    print_main_start("Creating Directory Structure")
    Dirs = Dir_Structure(cfg)

    # logging.info("Hi")
    logger = start_logging(Dirs, cfg)

    # Check to see if required ML files are ready to use
    ready_to_use = fetch_data(logger, dir_home, cfg_file_path)
    assert ready_to_use, "Required ML files are not ready to use!\nThe download may have failed,\nor\nthe directory structure of LM2 has been altered"

    # Wrangle images and preprocess
    print_main_start("Gathering Images and Image Metadata")
    Project = Project_Info(cfg, logger, dir_home, Dirs) # Where file names are modified

    # Save config file
    save_config_file(cfg, logger, Dirs)

    # Detect Archival Components
    print_main_start("Locating Archival Components")
    Project = detect_archival_components(cfg, logger, dir_home, Project, Dirs)

    # Save cropped detections
    crop_detections_from_images_VV(cfg, logger, dir_home, Project, Dirs)

    # Process labels
    Voucher_Vision = VoucherVision(cfg, logger, dir_home, path_custom_prompts, Project, Dirs)
    n_images = len(Voucher_Vision.img_paths)
    last_JSON_response, total_tokens_in, total_tokens_out = Voucher_Vision.process_specimen_batch(progress_report)
    
    if path_api_cost:
        cost_summary, data, total_cost = save_token_info_as_csv(Dirs, cfg['leafmachine']['LLM_version'], path_api_cost, total_tokens_in, total_tokens_out, n_images)
        add_to_expense_report(dir_home, data)
        logger.info(cost_summary)
    else:
        total_cost = None #TODO add config tests to expense_report

    t_overall_s = perf_counter()
    logger.name = 'Run Complete! :)'
    logger.info(f"[Total elapsed time] {round((t_overall_s - t_overall)/60)} minutes")
    space_saver(cfg, Dirs, logger)

    for handler in logger.handlers[:]:
        handler.close()
        logger.removeHandler(handler)

    return last_JSON_response, total_cost

def voucher_vision_OCR_test(cfg_file_path, dir_home, cfg_test, path_to_crop):
    # get_n_overall = progress_report.get_n_overall()
    # progress_report.update_overall(f"Working on {test_ind+1} of {get_n_overall}")

    # Load config file
    report_config(dir_home, cfg_file_path, system='VoucherVision')

    if cfg_test is None:
        cfg = load_config_file(dir_home, cfg_file_path, system='VoucherVision')  # For VoucherVision
    else:
        cfg = cfg_test 
    # user_cfg = load_config_file(dir_home, cfg_file_path)
    # cfg = Config(user_cfg)

    # Check to see if there are subdirs
    # Yes --> use the names of the subsirs as run_name
    run_name, dirs_list, has_subdirs = check_for_subdirs_VV(cfg)
    print(f"run_name {run_name} dirs_list{dirs_list} has_subdirs{has_subdirs}")

    # for dir_ind, dir_in in enumerate(dirs_list):
    #     if has_subdirs:
    #         cfg['leafmachine']['project']['dir_images_local'] = dir_in
    #         cfg['leafmachine']['project']['run_name'] = run_name[dir_ind]

    # Dir structure
    print_main_start("Creating Directory Structure")
    Dirs = Dir_Structure(cfg)

    # logging.info("Hi")
    logger = start_logging(Dirs, cfg)

    # Check to see if required ML files are ready to use
    ready_to_use = fetch_data(logger, dir_home, cfg_file_path)
    assert ready_to_use, "Required ML files are not ready to use!\nThe download may have failed,\nor\nthe directory structure of LM2 has been altered"

    # Wrangle images and preprocess
    print_main_start("Gathering Images and Image Metadata")
    Project = Project_Info(cfg, logger, dir_home, Dirs) # Where file names are modified

    # Save config file
    save_config_file(cfg, logger, Dirs)

    # Detect Archival Components
    print_main_start("Locating Archival Components")
    Project = detect_archival_components(cfg, logger, dir_home, Project, Dirs)

    # Save cropped detections
    crop_detections_from_images_VV(cfg, logger, dir_home, Project, Dirs)

    # Process labels
    Voucher_Vision = VoucherVision(cfg, logger, dir_home, None, Project, Dirs)
    last_JSON_response = Voucher_Vision.process_specimen_batch_OCR_test(path_to_crop)

if __name__ == '__main__':    
    is_test = False

    # Set LeafMachine2 dir 
    dir_home = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))

    if is_test:
        cfg_file_path = os.path.join(dir_home, 'demo','demo.yaml') #'D:\Dropbox\LeafMachine2\LeafMachine2.yaml'
        # cfg_file_path = 'test_installation'

        cfg_testing = load_config_file_testing(dir_home, cfg_file_path)
        cfg_testing['leafmachine']['project']['dir_images_local'] = os.path.join(dir_home, cfg_testing['leafmachine']['project']['dir_images_local'][0], cfg_testing['leafmachine']['project']['dir_images_local'][1])
        cfg_testing['leafmachine']['project']['dir_output'] = os.path.join(dir_home, cfg_testing['leafmachine']['project']['dir_output'][0], cfg_testing['leafmachine']['project']['dir_output'][1])

        last_JSON_response = voucher_vision(cfg_file_path, dir_home, cfg_testing, None)
    else:
        cfg_file_path = None
        cfg_testing = None
        last_JSON_response = voucher_vision(cfg_file_path, dir_home, cfg_testing, None)