Spaces:

phyloforfun
/

VoucherVision

Running

App Files Files Community

phyloforfun commited on Feb 12, 2024

Commit

524a99c

1 Parent(s): 0560c52

Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing

Browse files

Files changed (20) hide show

app.py +463 -861
install_dependencies.sh +85 -0
pages/faqs.py +38 -0
pages/prompt_builder.py +478 -0
pages/report_bugs.py +19 -0
requirements.txt +0 -0
run_VoucherVision.py +3 -8
vouchervision/LLM_crew_OpenAI.py +130 -0
vouchervision/LLM_local_cpu_MistralAI.py +0 -2
vouchervision/OCR_CRAFT.py +55 -0
vouchervision/OCR_google_cloud_vision.py +272 -55
vouchervision/OCR_llava.py +324 -0
vouchervision/VoucherVision_Config_Builder.py +24 -6
vouchervision/data_project.py +33 -6
vouchervision/general_utils.py +11 -8
vouchervision/llava_test.py +34 -0
vouchervision/utils_LLM.py +80 -29
vouchervision/utils_LLM_JSON_validation.py +1 -1
vouchervision/utils_VoucherVision.py +22 -19
vouchervision/vouchervision_main.py +12 -5

app.py CHANGED Viewed

@@ -2,7 +2,6 @@ import streamlit as st
 import yaml, os, json, random, time, re, torch, random, warnings, shutil, sys
 import seaborn as sns
 import plotly.graph_objs as go
-from itertools import chain
 from PIL import Image
 import pandas as pd
 from io import BytesIO
@@ -15,30 +14,190 @@ from vouchervision.vouchervision_main import voucher_vision
 from vouchervision.general_utils import test_GPU, get_cfg_from_full_path, summarize_expense_report, validate_dir
 from vouchervision.model_maps import ModelMaps
 from vouchervision.API_validation import APIvalidation
-from vouchervision.utils_hf import setup_streamlit_config, save_uploaded_file, check_prompt_yaml_filename, save_uploaded_local
 #################################################################################################################################################
 # Initializations ###############################################################################################################################
 #################################################################################################################################################
-st.set_page_config(layout="wide", page_icon='img/icon.ico', page_title='VoucherVision')
 # Parse the 'is_hf' argument and set it in session state
 if 'is_hf' not in st.session_state:
-    st.session_state['is_hf'] = True
-########################################################################################################
-###  ADDED FOR HUGGING FACE                                                                         ####
-########################################################################################################
-print(f"is_hf {st.session_state['is_hf']}")
 # Default YAML file path
 if 'config' not in st.session_state:
     st.session_state.config, st.session_state.dir_home = build_VV_config(loaded_cfg=None)
     setup_streamlit_config(st.session_state.dir_home)
 if 'uploader_idk' not in st.session_state:
     st.session_state['uploader_idk'] = 1
 if 'input_list_small' not in st.session_state:
@@ -60,11 +219,12 @@ if 'dir_uploaded_images_small' not in st.session_state:
     st.session_state['dir_uploaded_images_small'] = os.path.join(st.session_state.dir_home,'uploads_small')
     validate_dir(os.path.join(st.session_state.dir_home,'uploads_small'))
-MAX_GALLERY_IMAGES = 20
-GALLERY_IMAGE_SIZE = 96
 def content_input_images(col_left, col_right):
     st.write('---')
     # col1, col2 = st.columns([2,8])
@@ -83,7 +243,7 @@ def content_input_images(col_left, col_right):
         if st.session_state.is_hf:
             st.session_state['dir_uploaded_images'] = os.path.join(st.session_state.dir_home,'uploads')
             st.session_state['dir_uploaded_images_small'] = os.path.join(st.session_state.dir_home,'uploads_small')
-            uploaded_files = st.file_uploader("Upload Images", type=['jpg', 'jpeg'], accept_multiple_files=True, key=st.session_state['uploader_idk'])
             st.button("Use Test Image",help="This will clear any uploaded images and load the 1 provided test image.",on_click=use_test_image)
     with col_right:
@@ -92,27 +252,37 @@ def content_input_images(col_left, col_right):
                 # Clear input image gallery and input list
                 clear_image_gallery()
-                # Process the new iamges
                 for uploaded_file in uploaded_files:
-                    file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
-                    st.session_state['input_list'].append(file_path)
-                    img = Image.open(file_path)
-                    img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
-                    file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
-                    st.session_state['input_list_small'].append(file_path_small)
-                    print(uploaded_file.name)
-                # Set the local images to the uploaded images
-                st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images']
-                n_images = len([f for f in os.listdir(st.session_state.config['leafmachine']['project']['dir_images_local']) if os.path.isfile(os.path.join(st.session_state.config['leafmachine']['project']['dir_images_local'], f))])
-                st.session_state['processing_add_on'] = n_images
-                uploaded_files = None
-                st.session_state['uploader_idk'] += 1
-                st.info(f"Processing **{n_images}** images from {st.session_state.config['leafmachine']['project']['dir_images_local']}")
             if st.session_state['input_list_small']:
                 if len(st.session_state['input_list_small']) > MAX_GALLERY_IMAGES:
@@ -150,7 +320,6 @@ def content_input_images(col_left, col_right):
                 st.session_state['dir_images_local_TEMP'] = st.session_state.config['leafmachine']['project']['dir_images_local']
                 print("rerun")
                 st.rerun()
 def list_jpg_files(directory_path):
     jpg_count = 0
@@ -243,39 +412,14 @@ def use_test_image():
         st.session_state['input_list_small'].append(file_path_small)
-def create_download_button_yaml(file_path, selected_yaml_file, key_val):
-    file_label = f"Download {selected_yaml_file}"
-    with open(file_path, 'rb') as f:
-        st.download_button(
-            label=file_label,
-            data=f,
-            file_name=os.path.basename(file_path),
-            mime='application/x-yaml',use_container_width=True,key=key_val,
-        )
-def upload_local_prompt_to_server(dir_prompt):
-    uploaded_file = st.file_uploader("Upload a custom prompt file", type=['yaml'])
-    if uploaded_file is not None:
-        # Check the file extension
-        file_name = uploaded_file.name
-        if file_name.endswith('.yaml'):
-            file_path = os.path.join(dir_prompt, file_name)
-            # Save the file
-            with open(file_path, 'wb') as f:
-                f.write(uploaded_file.getbuffer())
-            st.success(f"Saved file {file_name} in {dir_prompt}")
-        else:
-            st.error("Please upload a .yaml file that you previously created using this Prompt Builder tool.")
 def refresh():
     st.session_state['uploader_idk'] += 1
     st.write('')
 # def display_image_gallery():
 #     # Initialize the container
 #     con_image = st.empty()
@@ -516,10 +660,7 @@ class JSONReport:
-def does_private_file_exist():
-    dir_home = os.path.dirname(__file__)
-    path_cfg_private = os.path.join(dir_home, 'PRIVATE_DATA.yaml')
-    return os.path.exists(path_cfg_private)
@@ -971,534 +1112,14 @@ def save_changes_to_API_keys(cfg_private,openai_api_key,azure_openai_api_version
     # st.session_state.private_file = does_private_file_exist()
 # Function to load a YAML file and update session_state
-def load_prompt_yaml(filename):
-    st.session_state['user_clicked_load_prompt_yaml'] = filename
-    with open(filename, 'r') as file:
-        st.session_state['prompt_info'] = yaml.safe_load(file)
-        st.session_state['prompt_author'] = st.session_state['prompt_info'].get('prompt_author', st.session_state['default_prompt_author'])
-        st.session_state['prompt_author_institution'] = st.session_state['prompt_info'].get('prompt_author_institution', st.session_state['default_prompt_author_institution'])
-        st.session_state['prompt_name'] = st.session_state['prompt_info'].get('prompt_name', st.session_state['default_prompt_name'])
-        st.session_state['prompt_version'] = st.session_state['prompt_info'].get('prompt_version', st.session_state['default_prompt_version'])
-        st.session_state['prompt_description'] = st.session_state['prompt_info'].get('prompt_description', st.session_state['default_prompt_description'])
-        st.session_state['instructions'] = st.session_state['prompt_info'].get('instructions', st.session_state['default_instructions'])
-        st.session_state['json_formatting_instructions'] = st.session_state['prompt_info'].get('json_formatting_instructions', st.session_state['default_json_formatting_instructions'] )
-        st.session_state['rules'] = st.session_state['prompt_info'].get('rules', {})
-        st.session_state['mapping'] = st.session_state['prompt_info'].get('mapping', {})
-        st.session_state['LLM'] = st.session_state['prompt_info'].get('LLM', 'General Purpose')
-        # Placeholder:
-        st.session_state['assigned_columns'] = list(chain.from_iterable(st.session_state['mapping'].values()))
 ### Updated to match HF version
 # def save_prompt_yaml(filename):
-def save_prompt_yaml(filename, col):
-    yaml_content = {
-        'prompt_author': st.session_state['prompt_author'],
-        'prompt_author_institution': st.session_state['prompt_author_institution'],
-        'prompt_name': st.session_state['prompt_name'],
-        'prompt_version': st.session_state['prompt_version'],
-        'prompt_description': st.session_state['prompt_description'],
-        'LLM': st.session_state['LLM'],
-        'instructions': st.session_state['instructions'],
-        'json_formatting_instructions': st.session_state['json_formatting_instructions'],
-        'rules': st.session_state['rules'],
-        'mapping': st.session_state['mapping'],
-    }
-    dir_prompt = os.path.join(st.session_state.dir_home, 'custom_prompts')
-    filepath = os.path.join(dir_prompt, f"{filename}.yaml")
-    with open(filepath, 'w') as file:
-        yaml.safe_dump(dict(yaml_content), file, sort_keys=False)
-    st.success(f"Prompt saved as '{filename}.yaml'.")
-    with col: # added
-        create_download_button_yaml(filepath, filename,key_val=2456237465) # added
-def check_unique_mapping_assignments():
-    print(st.session_state['assigned_columns'])
-    if len(st.session_state['assigned_columns']) != len(set(st.session_state['assigned_columns'])):
-        st.error("Each column name must be assigned to only one category.")
-        return False
-    elif not st.session_state['assigned_columns']:
-        st.error("No columns have been mapped.")
-        return False
-    elif  len(st.session_state['assigned_columns']) != len(st.session_state['rules'].keys()):
-        incomplete = [item for item in list(st.session_state['rules'].keys()) if item not in st.session_state['assigned_columns']]
-        st.warning(f"These columns have been mapped: {st.session_state['assigned_columns']}")
-        st.error(f"However, these columns must be mapped before the prompt is complete: {incomplete}")
-        return False
-    else:
-        st.success("Mapping confirmed.")
-        return True
-def check_prompt_yaml_filename(fname):
-    # Check if the filename only contains letters, numbers, underscores, and dashes
-    pattern = r'^[\w-]+$'
-    # The \w matches any alphanumeric character and is equivalent to the character class [a-zA-Z0-9_].
-    # The hyphen - is literally matched.
-    if re.match(pattern, fname):
-        return True
-    else:
-        return False
-def btn_load_prompt(selected_yaml_file, dir_prompt):
-    if selected_yaml_file:
-        yaml_file_path = os.path.join(dir_prompt, selected_yaml_file)
-        load_prompt_yaml(yaml_file_path)
-    elif not selected_yaml_file:
-        # Directly assigning default values since no file is selected
-        st.session_state['prompt_info'] = {}
-        st.session_state['prompt_author'] = st.session_state['default_prompt_author']
-        st.session_state['prompt_author_institution'] = st.session_state['default_prompt_author_institution']
-        st.session_state['prompt_name'] = st.session_state['prompt_name']
-        st.session_state['prompt_version'] = st.session_state['prompt_version']
-        st.session_state['prompt_description'] = st.session_state['default_prompt_description']
-        st.session_state['instructions'] = st.session_state['default_instructions']
-        st.session_state['json_formatting_instructions'] = st.session_state['default_json_formatting_instructions']
-        st.session_state['rules'] = {}
-        st.session_state['LLM'] = 'General Purpose'
-        st.session_state['assigned_columns'] = []
-        st.session_state['prompt_info'] = {
-            'prompt_author': st.session_state['prompt_author'],
-            'prompt_author_institution': st.session_state['prompt_author_institution'],
-            'prompt_name': st.session_state['prompt_name'],
-            'prompt_version': st.session_state['prompt_version'],
-            'prompt_description': st.session_state['prompt_description'],
-            'instructions': st.session_state['instructions'],
-            'json_formatting_instructions': st.session_state['json_formatting_instructions'],
-            'rules': st.session_state['rules'],
-            'mapping': st.session_state['mapping'],
-            'LLM': st.session_state['LLM']
-        }
-def build_LLM_prompt_config():
-    col_main1, col_main2 = st.columns([10,2])
-    with col_main1:
-        st.session_state.logo_path = os.path.join(st.session_state.dir_home, 'img','logo.png')
-        st.session_state.logo = Image.open(st.session_state.logo_path)
-        st.image(st.session_state.logo, width=250)
-    with col_main2:
-        if st.button('Exit',key='exist button 2'):
-                st.session_state.proceed_to_build_llm_prompt = False
-                st.session_state.proceed_to_main = True
-                st.rerun()
-    st.session_state['assigned_columns'] = []
-    st.session_state['default_prompt_author'] = 'unknown'
-    st.session_state['default_prompt_author_institution'] = 'unknown'
-    st.session_state['default_prompt_name'] = 'custom_prompt'
-    st.session_state['default_prompt_version'] = 'v-1-0'
-    st.session_state['default_prompt_author_institution'] = 'unknown'
-    st.session_state['default_prompt_description'] = 'unknown'
-    st.session_state['default_LLM'] = 'General Purpose'
-    st.session_state['default_instructions'] = """1. Refactor the unstructured OCR text into a dictionary based on the JSON structure outlined below.
-2. Map the unstructured OCR text to the appropriate JSON key and populate the field given the user-defined rules.
-3. JSON key values are permitted to remain empty strings if the corresponding information is not found in the unstructured OCR text.
-4. Duplicate dictionary fields are not allowed.
-5. Ensure all JSON keys are in camel case.
-6. Ensure new JSON field values follow sentence case capitalization.
-7. Ensure all key-value pairs in the JSON dictionary strictly adhere to the format and data types specified in the template.
-8. Ensure output JSON string is valid JSON format. It should not have trailing commas or unquoted keys.
-9. Only return a JSON dictionary represented as a string. You should not explain your answer."""
-    st.session_state['default_json_formatting_instructions'] = """This section provides rules for formatting each JSON value organized by the JSON key."""
-    # Start building the Streamlit app
-    col_prompt_main_left, ___, col_prompt_main_right = st.columns([6,1,3])
-    with col_prompt_main_left:
-        st.title("Custom LLM Prompt Builder")
-        st.subheader('About')
-        st.write("This form allows you to craft a prompt for your specific task. You can also edit the JSON yaml files directly, but please try loading the prompt back into this form to ensure that the formatting is correct. If this form cannot load your manually edited JSON yaml file, then it will not work in VoucherVision.")
-        st.subheader(':rainbow[How it Works]')
-        st.write("1. Edit this page until you are happy with your instructions. We recommend looking at the basic structure, writing down your prompt inforamtion in a Word document so that it does not randomly disappear, and then copying and pasting that info into this form once your whole prompt structure is defined.")
-        st.write("2. After you enter all of your prompt instructions, click 'Save' and give your file a name.")
-        st.write("3. This file will be saved as a yaml configuration file in the `..VoucherVision/custom_prompts` folder.")
-        st.write("4. When you go back the main VoucherVision page you will now see your custom prompt available in the 'Prompt Version' dropdown menu.")
-        st.write("5. The LLM ***only*** sees information from the 'instructions', 'rules', and 'json_formatting_instructions' sections. All other information is for versioning and integration with VoucherVisionEditor.")
-        st.write("---")
-        st.header('Load an Existing Prompt Template')
-        st.write("By default, this form loads the minimum required transcription fields but does not provide rules for each field. You can also load an existing prompt as a template, editing or deleting values as needed.")
-        dir_prompt = os.path.join(st.session_state.dir_home, 'custom_prompts')
-        yaml_files = [f for f in os.listdir(dir_prompt) if f.endswith('.yaml')]
-        col_load_text, col_load_btn, col_load_btn2 = st.columns([8,2,2])
-        with col_load_text:
-        # Dropdown for selecting a YAML file
-            st.session_state['selected_yaml_file'] = st.selectbox('Select a prompt .YAML file to load:', [''] + yaml_files)
-        with col_load_btn:
-            st.write('##')
-            # Button to load the selected prompt
-            st.button('Load Prompt', on_click=btn_load_prompt, args=[st.session_state['selected_yaml_file'], dir_prompt],use_container_width=True)
-        with col_load_btn2:
-            if st.session_state['selected_yaml_file']:
-                # Construct the full path to the file
-                download_file_path = os.path.join(dir_prompt, st.session_state['selected_yaml_file'] )
-                # Create the download button
-                st.write('##')
-                create_download_button_yaml(download_file_path, st.session_state['selected_yaml_file'],key_val=345798)
-        # Prompt Author Information
-        st.write("---")
-        st.header("Prompt Author Information")
-        st.write("We value community contributions! Please provide your name(s) (or pseudonym if you prefer) for credit. If you leave this field blank, it will say 'unknown'.")
-        if 'prompt_author' not in st.session_state:# != st.session_state['default_prompt_author']:
-            st.session_state['prompt_author'] = st.text_input("Enter names of prompt author(s)", value=st.session_state['default_prompt_author'],key=1111)
-        else:
-            st.session_state['prompt_author'] = st.text_input("Enter names of prompt author(s)", value=st.session_state['prompt_author'],key=1112)
-        # Institution
-        st.write("Please provide your institution name. If you leave this field blank, it will say 'unknown'.")
-        if 'prompt_author_institution' not in st.session_state:
-            st.session_state['prompt_author_institution'] = st.text_input("Enter name of institution", value=st.session_state['default_prompt_author_institution'],key=1113)
-        else:
-            st.session_state['prompt_author_institution'] = st.text_input("Enter name of institution", value=st.session_state['prompt_author_institution'],key=1114)
-        # Prompt name
-        st.write("Please provide a simple name for your prompt. If you leave this field blank, it will say 'custom_prompt'.")
-        if 'prompt_name' not in st.session_state:
-            st.session_state['prompt_name'] = st.text_input("Enter prompt name", value=st.session_state['default_prompt_name'],key=1115)
-        else:
-            st.session_state['prompt_name'] = st.text_input("Enter prompt name", value=st.session_state['prompt_name'],key=1116)
-        # Prompt verion
-        st.write("Please provide a version identifier for your prompt. If you leave this field blank, it will say 'v-1-0'.")
-        if 'prompt_version' not in st.session_state:
-            st.session_state['prompt_version'] = st.text_input("Enter prompt version", value=st.session_state['default_prompt_version'],key=1117)
-        else:
-            st.session_state['prompt_version'] = st.text_input("Enter prompt version", value=st.session_state['prompt_version'],key=1118)
-        st.write("Please provide a description of your prompt and its intended task. Is it designed for a specific collection? Taxa? Database structure?")
-        if 'prompt_description' not in st.session_state:
-            st.session_state['prompt_description'] = st.text_input("Enter description of prompt", value=st.session_state['default_prompt_description'],key=1119)
-        else:
-            st.session_state['prompt_description'] = st.text_input("Enter description of prompt", value=st.session_state['prompt_description'],key=11111)
-        st.write('---')
-        st.header("Set LLM Model Type")
-        # Define the options for the dropdown
-        llm_options_general = ["General Purpose",
-                       "OpenAI GPT Models","Google PaLM2 Models","Google Gemini Models","MistralAI Models",]
-        llm_options_all = ModelMaps.get_models_gui_list()
-        if 'LLM' not in st.session_state:
-            st.session_state['LLM'] = st.session_state['default_LLM']
-        if st.session_state['LLM']:
-            llm_options = llm_options_general + llm_options_all + [st.session_state['LLM']]
-        else:
-            llm_options = llm_options_general + llm_options_all
-        # Create the dropdown and set the value to session_state['LLM']
-        st.write("Which LLM is this prompt designed for? This will not restrict its use to a specific LLM, but some prompts will behave differently across models.")
-        st.write("SLTPvA prompts have been validated with all supported LLMs, but perfornce may vary. If you design a prompt to work best with a specific model, then you can indicate the model here.")
-        st.write("For general purpose prompts (like the SLTPvA prompts) just use the 'General Purpose' option.")
-        st.session_state['LLM'] = st.selectbox('Set LLM', llm_options, index=llm_options.index(st.session_state.get('LLM', 'General Purpose')))
-        st.write('---')
-        # Instructions Section
-        st.header("Instructions")
-        st.write("These are the general instructions that guide the LLM through the transcription task. We recommend using the default instructions unless you have a specific reason to change them.")
-        if 'instructions' not in st.session_state:
-            st.session_state['instructions'] = st.text_area("Enter guiding instructions", value=st.session_state['default_instructions'].strip(), height=350,key=111112)
-        else:
-            st.session_state['instructions'] = st.text_area("Enter guiding instructions", value=st.session_state['instructions'].strip(), height=350,key=111112)
-        st.write('---')
-        # Column Instructions Section
-        st.header("JSON Formatting Instructions")
-        st.write("The following section tells the LLM how we want to structure the JSON dictionary. We do not recommend changing this section because it would likely result in unstable and inconsistent behavior.")
-        if 'json_formatting_instructions' not in st.session_state:
-            st.session_state['json_formatting_instructions'] = st.text_area("Enter general JSON guidelines", value=st.session_state['default_json_formatting_instructions'],key=111114)
-        else:
-            st.session_state['json_formatting_instructions'] = st.text_area("Enter general JSON guidelines", value=st.session_state['json_formatting_instructions'],key=111115)
-        st.write('---')
-        col_left, col_right = st.columns([6,4])
-        null_value_rules = ''
-        c_name = "EXAMPLE_COLUMN_NAME"
-        c_value = "REPLACE WITH DESCRIPTION"
-        with col_left:
-            st.subheader('Add/Edit Columns')
-            st.markdown("The pre-populated fields are REQUIRED for downstream validation steps. They must be in all prompts.")
-            # Initialize rules in session state if not already present
-            if 'rules' not in st.session_state or not st.session_state['rules']:
-                for required_col in st.session_state['required_fields']:
-                    st.session_state['rules'][required_col] = c_value
-            # Layout for adding a new column name
-            # col_text, col_textbtn = st.columns([8, 2])
-            # with col_text:
-            st.session_state['new_column_name'] = st.text_input("Enter a new column name:")
-            # with col_textbtn:
-            # st.write('##')
-            if st.button("Add New Column") and st.session_state['new_column_name']:
-                if st.session_state['new_column_name'] not in st.session_state['rules']:
-                    st.session_state['rules'][st.session_state['new_column_name']] = c_value
-                    st.success(f"New column '{st.session_state['new_column_name']}' added. Now you can edit its properties.")
-                    st.session_state['new_column_name'] = ''
-                else:
-                    st.error("Column name already exists. Please enter a unique column name.")
-                    st.session_state['new_column_name'] = ''
-            # Get columns excluding the protected "catalogNumber"
-            st.write('#')
-            # required_columns = [col for col in st.session_state['rules'] if col not in st.session_state['required_fields']]
-            editable_columns = [col for col in st.session_state['rules'] if col not in ["catalogNumber"]]
-            removable_columns = [col for col in st.session_state['rules'] if col not in st.session_state['required_fields']]
-            st.session_state['current_rule'] = st.selectbox("Select a column to edit:", [""] + editable_columns)
-            # column_name = st.selectbox("Select a column to edit:", editable_columns)
-            # if 'current_rule' not in st.session_state:
-            #     st.session_state['current_rule'] = current_rule
-            # Form for input fields
-            with st.form(key='rule_form'):
-                # format_options = ["verbatim transcription", "spell check transcription", "boolean yes no", "boolean 1 0", "integer", "[list]", "yyyy-mm-dd"]
-                # current_rule["format"] = st.selectbox("Format:", format_options, index=format_options.index(current_rule["format"]) if current_rule["format"] else 0)
-                # current_rule["null_value"] = st.text_input("Null value:", value=current_rule["null_value"])
-                if st.session_state['current_rule']:
-                    current_rule_description = st.text_area("Description of category:", value=st.session_state['rules'][st.session_state['current_rule']])
-                else:
-                    current_rule_description = ''
-                commit_button = st.form_submit_button("Commit Column")
-            # default_rule = {
-            #     "format": format_options[0],  # default format
-            #     "null_value": "",  # default null value
-            #     "description": "",  # default description
-            # }
-            # if st.session_state['current_rule'] != st.session_state['current_rule']:
-            #     # Column has changed. Update the session_state selected column.
-            #     st.session_state['current_rule'] = st.session_state['current_rule']
-                # # Reset the current rule to the default for this new column, or a blank rule if not set.
-                # current_rule = st.session_state['rules'][st.session_state['current_rule']].get(current_rule, c_value)
-            # Handle commit action
-            if commit_button and st.session_state['current_rule']:
-                # Commit the rules to the session state.
-                st.session_state['rules'][st.session_state['current_rule']] = current_rule_description
-                st.success(f"Column '{st.session_state['current_rule']}' added/updated in rules.")
-                # Force the form to reset by clearing the fields from the session state
-                st.session_state.pop('current_rule', None)  # Clear the selected column to force reset
-                # st.session_state['rules'][column_name] = current_rule
-                # st.success(f"Column '{column_name}' added/updated in rules.")
-                # # Reset current_rule to default values for the next input
-                # current_rule["format"] = default_rule["format"]
-                # current_rule["null_value"] = default_rule["null_value"]
-                # current_rule["description"] = default_rule["description"]
-                # # To ensure that the form fields are reset, we can clear them from the session state
-                # for key in current_rule.keys():
-                #     st.session_state[key] = default_rule[key]
-            # Layout for removing an existing column
-            # del_col, del_colbtn = st.columns([8, 2])
-            # with del_col:
-            delete_column_name = st.selectbox("Select a column to delete:", [""] + removable_columns)
-            # with del_colbtn:
-            # st.write('##')
-            if st.button("Delete Column") and delete_column_name:
-                del st.session_state['rules'][delete_column_name]
-                st.success(f"Column '{delete_column_name}' removed from rules.")
-        with col_right:
-            # Display the current state of the JSON rules
-            st.subheader('Formatted Columns')
-            st.json(st.session_state['rules'])
-            # st.subheader('All Prompt Info')
-            # st.json(st.session_state['prompt_info'])
-        st.write('---')
-        col_left_mapping, col_right_mapping = st.columns([6,4])
-        with col_left_mapping:
-            st.header("Mapping")
-            st.write("Assign each column name to a single category.")
-            st.session_state['refresh_mapping'] = False
-            # Dynamically create a list of all column names that can be assigned
-            # This assumes that the column names are the keys in the dictionary under 'rules'
-            all_column_names = list(st.session_state['rules'].keys())
-            categories = ['TAXONOMY', 'GEOGRAPHY', 'LOCALITY', 'COLLECTING', 'MISC']
-            if ('mapping' not in st.session_state) or (st.session_state['mapping'] == {}):
-                st.session_state['mapping'] = {category: [] for category in categories}
-            for category in categories:
-                # Filter out the already assigned columns
-                available_columns = [col for col in all_column_names if col not in st.session_state['assigned_columns'] or col in st.session_state['mapping'].get(category, [])]
-                # Ensure the current mapping is a subset of the available options
-                current_mapping = [col for col in st.session_state['mapping'].get(category, []) if col in available_columns]
-                # Provide a safe default if the current mapping is empty or contains invalid options
-                safe_default = current_mapping if all(col in available_columns for col in current_mapping) else []
-                # Create a multi-select widget for the category with a safe default
-                selected_columns = st.multiselect(
-                    f"Select columns for {category}:",
-                    available_columns,
-                    default=safe_default,
-                    key=f"mapping_{category}"
-                )
-                # Update the assigned_columns based on the selections
-                for col in current_mapping:
-                    if col not in selected_columns and col in st.session_state['assigned_columns']:
-                        st.session_state['assigned_columns'].remove(col)
-                        st.session_state['refresh_mapping'] = True
-                for col in selected_columns:
-                    if col not in st.session_state['assigned_columns']:
-                        st.session_state['assigned_columns'].append(col)
-                        st.session_state['refresh_mapping'] = True
-                # Update the mapping in session state when there's a change
-                st.session_state['mapping'][category] = selected_columns
-            if st.session_state['refresh_mapping']:
-                st.session_state['refresh_mapping'] = False
-        # Button to confirm and save the mapping configuration
-        if st.button('Confirm Mapping'):
-            if check_unique_mapping_assignments():
-                # Proceed with further actions since the mapping is confirmed and unique
-                pass
-        with col_right_mapping:
-            # Display the current state of the JSON rules
-            st.subheader('Formatted Column Maps')
-            st.json(st.session_state['mapping'])
-        col_left_save, col_right_save = st.columns([6,4])
-        with col_left_save:
-            # Input for new file name
-            new_filename = st.text_input("Enter filename to save your prompt as a configuration YAML:",placeholder='my_prompt_name')
-            # Button to save the new YAML file
-            if st.button('Save YAML', type='primary'):
-                if new_filename:
-                    if check_unique_mapping_assignments():
-                        if check_prompt_yaml_filename(new_filename):
-                            save_prompt_yaml(new_filename, col_left_save)
-                        else:
-                            st.error("File name can only contain letters, numbers, underscores, and dashes. Cannot contain spaces.")
-                    else:
-                        st.error("Mapping contains an error. Make sure that each column is assigned to only ***one*** category.")
-                else:
-                    st.error("Please enter a filename.")
-            if st.button('Exit'):
-                st.session_state.proceed_to_build_llm_prompt = False
-                st.session_state.proceed_to_main = True
-                st.rerun()
-        # st.write('---')
-        # st.header("Save and Download Custom Prompt")
-        # st.write('Once you click save, validation checks will verify the formatting and then a download button will appear so that you can ***save a local copy of your custom prompt.***')
-        # col_left_save, col_right_save, _ = st.columns([2,2,8])
-        # with col_left_save:
-        #     # Button to save the new YAML file
-        #     if st.button('Save YAML', type='primary',key=3450798):
-        #         if st.session_state['prompt_name']:
-        #             if check_unique_mapping_assignments():
-        #                 if check_prompt_yaml_filename(st.session_state['prompt_name']):
-        #                     save_prompt_yaml(st.session_state['prompt_name'], col_right_save)
-        #                 else:
-        #                     st.error("File name can only contain letters, numbers, underscores, and dashes. Cannot contain spaces.")
-        #             else:
-        #                 st.error("Mapping contains an error. Make sure that each column is assigned to only ***one*** category.")
-        #         else:
-        #             st.error("Please enter a filename.")
-    # with col_prompt_main_right:
-    #     st.subheader('All Prompt Components')
-    #     st.session_state['prompt_info'] = {
-    #         'prompt_author': st.session_state['prompt_author'],
-    #         'prompt_author_institution': st.session_state['prompt_author_institution'],
-    #         'prompt_name': st.session_state['prompt_name'],
-    #         'prompt_version': st.session_state['prompt_version'],
-    #         'prompt_description': st.session_state['prompt_description'],
-    #         'LLM': st.session_state['LLM'],
-    #         'instructions': st.session_state['instructions'],
-    #         'json_formatting_instructions': st.session_state['json_formatting_instructions'],
-    #         'rules': st.session_state['rules'],
-    #         'mapping': st.session_state['mapping'],
-    #     }
-    #     st.json(st.session_state['prompt_info'])
-    with col_prompt_main_right:
-        if st.session_state['user_clicked_load_prompt_yaml'] is None: # see if user has loaded a yaml to edit
-            st.session_state['show_prompt_name_e'] = f"Prompt Status  :arrow_forward:  Building prompt from scratch"
-            if st.session_state['prompt_name']:
-                st.session_state['show_prompt_name_w'] = f"New Prompt Name  :arrow_forward:  {st.session_state['prompt_name']}.yaml"
-            else:
-                st.session_state['show_prompt_name_w'] = f"New Prompt Name  :arrow_forward:  [PLEASE SET NAME]"
-        else:
-            st.session_state['show_prompt_name_e'] = f"Prompt Status: Editing  :arrow_forward:  {st.session_state['selected_yaml_file']}"
-            if st.session_state['prompt_name']:
-                st.session_state['show_prompt_name_w'] = f"New Prompt Name  :arrow_forward:  {st.session_state['prompt_name']}.yaml"
-            else:
-                st.session_state['show_prompt_name_w'] = f"New Prompt Name  :arrow_forward:  [PLEASE SET NAME]"
-        st.subheader(f'Full Prompt')
-        st.write(st.session_state['show_prompt_name_e'])
-        st.write(st.session_state['show_prompt_name_w'])
-        st.write("---")
-        st.session_state['prompt_info'] = {
-            'prompt_author': st.session_state['prompt_author'],
-            'prompt_author_institution': st.session_state['prompt_author_institution'],
-            'prompt_name': st.session_state['prompt_name'],
-            'prompt_version': st.session_state['prompt_version'],
-            'prompt_description': st.session_state['prompt_description'],
-            'LLM': st.session_state['LLM'],
-            'instructions': st.session_state['instructions'],
-            'json_formatting_instructions': st.session_state['json_formatting_instructions'],
-            'rules': st.session_state['rules'],
-            'mapping': st.session_state['mapping'],
-        }
-        st.json(st.session_state['prompt_info'])
 def show_header_welcome():
     st.session_state.logo_path = os.path.join(st.session_state.dir_home, 'img','logo.png')
     st.session_state.logo = Image.open(st.session_state.logo_path)
@@ -1676,7 +1297,7 @@ def content_header():
     with col_run_4:
         with st.expander("View Messages and Updates"):
             st.info("***Note:*** If you use VoucherVision frequently, you can change the default values that are auto-populated in the form below. In a text editor or IDE, edit the first few rows in the file `../VoucherVision/vouchervision/VoucherVision_Config_Builder.py`")
     col_test = st.container()
@@ -1686,13 +1307,6 @@ def content_header():
     col_json, col_json_WFO, col_json_GEO, col_json_map = st.columns([2, 2, 2, 2])
     with col_run_info_1:
-        # Progress
-        # Progress
-        # st.subheader('Project')
-        # bar = st.progress(0)
-        # new_text = st.empty()  # Placeholder for current step name
-        # progress_report = ProgressReportVV(bar, new_text, n_images=10)
         # Progress
         overall_progress_bar = st.progress(0)
         text_overall = st.empty()  # Placeholder for current step name
@@ -1700,23 +1314,14 @@ def content_header():
         batch_progress_bar = st.progress(0)
         text_batch = st.empty()  # Placeholder for current step name
         progress_report = ProgressReport(overall_progress_bar, batch_progress_bar, text_overall, text_batch)
-        # st.session_state['json_report'] = JSONReport(col_updates_1, col_json, col_json_WFO, col_json_GEO, col_json_map)
         st.session_state['hold_output'] = st.toggle('View Final Transcription')
     with col_logo:
         show_header_welcome()
     with col_run_1:
-        # st.subheader('Run VoucherVision')
         N_STEPS = 6
-        # if st.session_state.is_hf:
-        #     count_n_imgs = determine_n_images()
-        #     if count_n_imgs > 0:
-        #         st.session_state['processing_add_on'] = count_n_imgs
-        #     else:
-        #         st.session_state['processing_add_on'] = 0
         if check_if_usable(is_hf=st.session_state['is_hf']):
             b_text = f"Start Processing {st.session_state['processing_add_on']} Images" if st.session_state['processing_add_on'] > 1 else f"Start Processing {st.session_state['processing_add_on']} Image"
             if st.session_state['processing_add_on'] == 0:
@@ -1740,21 +1345,28 @@ def content_header():
                 total_cost = 0.00
                 n_failed_OCR = 0
                 n_failed_LLM_calls = 0
-                try:
-                    st.session_state['formatted_json'], st.session_state['formatted_json_WFO'], st.session_state['formatted_json_GEO'], total_cost, n_failed_OCR, n_failed_LLM_calls, st.session_state['zip_filepath'] = voucher_vision(None,
-                                                                                                                st.session_state.dir_home,
-                                                                                                                path_custom_prompts,
-                                                                                                                None,
-                                                                                                                progress_report,
-                                                                                                                st.session_state['json_report'],
-                                                                                                                path_api_cost=os.path.join(st.session_state.dir_home,'api_cost','api_cost.yaml'),
-                                                                                                                is_hf = st.session_state['is_hf'],
-                                                                                                                is_real_run=True)
-                    st.balloons()
-                except Exception as e:
-                    with col_run_4:
-                        st.error(f"Transcription failed. Error: {e}")
                 if n_failed_OCR > 0:
                     with col_run_4:
@@ -1791,8 +1403,13 @@ def content_header():
     with ct_left:
         st.button("Refresh", on_click=refresh, use_container_width=True)
     with ct_right:
-        if st.button('FAQs', use_container_width=True):
-            pass
     # with col_run_2:
     #     if st.button("Test GPT"):
@@ -1869,14 +1486,6 @@ def content_header():
 def content_project_settings(col):
          ### Project
     with col:
@@ -1966,9 +1575,10 @@ def content_prompt_and_llm_version():
             st.session_state.config['leafmachine']['project']['prompt_version'] = st.selectbox("Prompt Version", available_prompts, index=available_prompts.index(selected_version),label_visibility='collapsed')
     with col_prompt_2:
-        if st.button("Build Custom LLM Prompt"):
-            st.session_state.proceed_to_build_llm_prompt = True
-            st.rerun()
     st.header('LLM Version')
     col_llm_1, col_llm_2 = st.columns([4,2])
@@ -2004,13 +1614,66 @@ def content_api_check():
                 st.rerun()
-def content_collage_overlay():
     st.write("---")
-    col_collage, col_overlay = st.columns([4,4])
     demo_text_h = f"Google_OCR_Handwriting:\nHERBARIUM OF MARCUS W. LYON , JR . Tracaulon sagittatum Indiana : Porter Co. incal Springs edge wet subdunal woods 1927 TX 11 Ilowers pink UNIVERSITE HERBARIUM MICH University of Michigan Herbarium 1439649 copyright reserved PERSICARIA FEB 2 6 1965 cm "
     demo_text_tr = f"trOCR:\nherbarium of marcus w. lyon jr. : : : tracaulon sagittatum indiana porter co. incal springs TX 11 Ilowers pink  1439649 copyright reserved D H U Q "
     demo_text_p = f"Google_OCR_Printed:\nTracaulon sagittatum Indiana : Porter Co. incal Springs edge wet subdunal woods 1927  Ilowers pink 1439649 copyright reserved PERSICARIA FEB 2 6 1965 cm "
@@ -2019,11 +1682,125 @@ def content_collage_overlay():
     demo_text_trh = demo_text_h + '\n' + demo_text_tr
     demo_text_trp = demo_text_p + '\n' + demo_text_tr
     with col_collage:
         st.header('LeafMachine2 Label Collage')
         default_crops = st.session_state.config['leafmachine']['cropped_components']['save_cropped_annotations']
         st.write("Prior to transcription, use LeafMachine2 to crop all labels from input images to create label collages for each specimen image. Showing just the text labels to the OCR algorithms significantly improves performance. This runs slowly on the free Hugging Face Space, but runs quickly with a fast CPU or any GPU.")
-        st.session_state.config['leafmachine']['use_RGB_label_images'] = st.checkbox("Use LeafMachine2 label collage for transcriptions", st.session_state.config['leafmachine'].get('use_RGB_label_images', False))
         option_selected_crops = st.multiselect(label="Components to crop",
@@ -2040,76 +1817,14 @@ def content_collage_overlay():
         with st.expander(":frame_with_picture: View an example of the LeafMachine2 collage image"):
             st.image(st.session_state["demo_collage"], caption='LeafMachine2 Collage', output_format="PNG")
             # st.image(st.session_state["demo_collage"], caption='LeafMachine2 Collage', output_format="JPEG")
     with col_overlay:
         st.header('OCR Overlay Image')
-        options = [":rainbow[Printed + Handwritten]", "Printed", "Use both models"]
-        captions = [
-            "Works well for both printed and handwritten text",
-            "Works for printed text",
-            "Adds both OCR versions to the LLM prompt"
-        ]
         st.write('This will plot bounding boxes around all text that Google Vision was able to detect. If there are no boxes around text, then the OCR failed, so that missing text will not be seen by the LLM when it is creating the JSON object. The created image will be viewable in the VoucherVisionEditor.')
         do_create_OCR_helper_image = st.checkbox("Create image showing an overlay of the OCR detections",value=st.session_state.config['leafmachine']['do_create_OCR_helper_image'],disabled=True)
         st.session_state.config['leafmachine']['do_create_OCR_helper_image'] = do_create_OCR_helper_image
-        # Get the current OCR option from session state
-        OCR_option = st.session_state.config['leafmachine']['project']['OCR_option']
-        # Map the OCR option to the index in options list
-        # You need to define the mapping based on your application's logic
-        option_to_index = {
-            'hand': 0,
-            'normal': 1,
-            'both': 2,
-        }
-        default_index = option_to_index.get(OCR_option, 0)  # Default to 0 if option not found
-        # Create the radio button
-        OCR_option_select = st.radio(
-            "Select the Google Vision OCR version.",
-            options,
-            index=default_index,
-            help="",captions=captions,
-        )
-        st.session_state.config['leafmachine']['project']['OCR_option'] = OCR_option_select
-        if OCR_option_select == ":rainbow[Printed + Handwritten]":
-            OCR_option = 'hand'
-        elif OCR_option_select == "Printed":
-            OCR_option = 'normal'
-        elif OCR_option_select == "Use both models":
-            OCR_option = 'both'
-        else:
-            raise
-        st.write("Supplement Google Vision OCR with trOCR (handwriting OCR) using `microsoft/trocr-base-handwritten`. This option requires Google Vision API and a GPU.")
-        do_use_trOCR = st.checkbox("Enable trOCR", value=st.session_state.config['leafmachine']['project']['do_use_trOCR'])#,disabled=st.session_state['lacks_GPU'])
-        st.session_state.config['leafmachine']['project']['do_use_trOCR'] = do_use_trOCR
-        st.session_state.config['leafmachine']['project']['OCR_option'] = OCR_option
-        st.markdown("Below is an example of what the LLM would see given the choice of OCR ensemble. One, two, or three version of OCR can be fed into the LLM prompt. Typically, 'printed + handwritten' works well. If you have a GPU then you can enable trOCR.")
-        if (OCR_option == 'hand') and not do_use_trOCR:
-            st.text_area(label='Handwritten/Printed',placeholder=demo_text_h,disabled=True, label_visibility='visible', height=150)
-        elif (OCR_option == 'normal') and not do_use_trOCR:
-            st.text_area(label='Printed',placeholder=demo_text_p,disabled=True, label_visibility='visible', height=150)
-        elif (OCR_option == 'both') and not do_use_trOCR:
-            st.text_area(label='Handwritten/Printed + Printed',placeholder=demo_text_b,disabled=True, label_visibility='visible', height=150)
-        elif (OCR_option == 'both') and do_use_trOCR:
-            st.text_area(label='Handwritten/Printed + Printed + trOCR',placeholder=demo_text_trb,disabled=True, label_visibility='visible', height=150)
-        elif (OCR_option == 'normal') and do_use_trOCR:
-            st.text_area(label='Printed + trOCR',placeholder=demo_text_trp,disabled=True, label_visibility='visible', height=150)
-        elif (OCR_option == 'hand') and do_use_trOCR:
-            st.text_area(label='Handwritten/Printed + trOCR',placeholder=demo_text_trh,disabled=True, label_visibility='visible', height=150)
         if "demo_overlay" not in st.session_state:
             # ocr = os.path.join(st.session_state.dir_home,'demo', 'ba','ocr.png')
@@ -2159,6 +1874,8 @@ def content_processing_options():
         st.subheader('Compute Options')
         st.session_state.config['leafmachine']['project']['num_workers'] = st.number_input("Number of CPU workers", value=st.session_state.config['leafmachine']['project'].get('num_workers', 1), disabled=False)
         st.session_state.config['leafmachine']['project']['batch_size'] = st.number_input("Batch size", value=st.session_state.config['leafmachine']['project'].get('batch_size', 500), help='Sets the batch size for the LeafMachine2 cropping. If computer RAM is filled, lower this value to ~100.')
     with col_processing_2:
         st.subheader('Filename Prefix Handling')
         st.session_state.config['leafmachine']['project']['prefix_removal'] = st.text_input("Remove prefix from catalog number", st.session_state.config['leafmachine']['project'].get('prefix_removal', ''),placeholder="e.g. MICH-V-")
@@ -2167,18 +1884,21 @@ def content_processing_options():
     ### Logging and Image Validation - col_v1
     st.write("---")
-    st.header('Logging and Image Validation')
     col_v1, col_v2 = st.columns(2)
     with col_v1:
         option_check_illegal = st.checkbox("Check for illegal filenames", value=st.session_state.config['leafmachine']['do']['check_for_illegal_filenames'])
         st.session_state.config['leafmachine']['do']['check_for_illegal_filenames'] = option_check_illegal
         st.session_state.config['leafmachine']['do']['check_for_corrupt_images_make_vertical'] = st.checkbox("Check for corrupt images", st.session_state.config['leafmachine']['do'].get('check_for_corrupt_images_make_vertical', True),disabled=True)
         st.session_state.config['leafmachine']['print']['verbose'] = st.checkbox("Print verbose", st.session_state.config['leafmachine']['print'].get('verbose', True))
         st.session_state.config['leafmachine']['print']['optional_warnings'] = st.checkbox("Show optional warnings", st.session_state.config['leafmachine']['print'].get('optional_warnings', True))
-    with col_v2:
         log_level = st.session_state.config['leafmachine']['logging'].get('log_level', None)
         log_level_display = log_level if log_level is not None else 'default'
         selected_log_level = st.selectbox("Logging Level", ['default', 'DEBUG', 'INFO', 'WARNING', 'ERROR'], index=['default', 'DEBUG', 'INFO', 'WARNING', 'ERROR'].index(log_level_display))
@@ -2188,6 +1908,28 @@ def content_processing_options():
         else:
             st.session_state.config['leafmachine']['logging']['log_level'] = selected_log_level
 def content_tab_domain():
@@ -2254,7 +1996,9 @@ def render_expense_report_summary():
     expense_report = st.session_state.expense_report
     st.header('Expense Report Summary')
-    if expense_summary:
         st.metric(label="Total Cost", value=f"${round(expense_summary['total_cost_sum'], 4):,}")
         col1, col2 = st.columns(2)
@@ -2348,19 +2092,21 @@ def render_expense_report_summary():
         pie_chart.update_traces(marker=dict(colors=colors),)
         st.plotly_chart(pie_chart, use_container_width=True)
-    else:
-        st.error('No expense report data available.')
 def content_less_used():
     st.write('---')
     st.write(':octagonal_sign: ***NOTE:*** Settings below are not relevant for most projects. Some settings below may not be reflected in saved settings files and would need to be set each time.')
 #################################################################################################################################################
 # Sidebar #######################################################################################################################################
 #################################################################################################################################################
 def sidebar_content():
     if not os.path.exists(os.path.join(st.session_state.dir_home,'expense_report')):
         validate_dir(os.path.join(st.session_state.dir_home,'expense_report'))
     expense_report_path = os.path.join(st.session_state.dir_home, 'expense_report', 'expense_report.csv')
@@ -2377,7 +2123,6 @@ def sidebar_content():
         st.write('Available after first run...')
 #################################################################################################################################################
 # Routing Function ##############################################################################################################################
 #################################################################################################################################################
@@ -2387,28 +2132,20 @@ def main():
         sidebar_content()
     # Main App
     content_header()
     col_input, col_gallery = st.columns([4,8])
     content_project_settings(col_input)
     content_input_images(col_input, col_gallery)
-    # if st.session_state['is_hf']:
-    #     content_project_settings()
-    #     content_input_images_hf()
-    # else:
-    #     col1, col2 = st.columns([1,1])
-    #     with col1:
-    #         content_project_settings()
-    #     with col2:
-    #         content_input_images()
     col3, col4 = st.columns([1,1])
     with col3:
         content_prompt_and_llm_version()
     with col4:
         content_api_check()
     content_collage_overlay()
     content_llm_cost()
     content_processing_options()
@@ -2418,155 +2155,20 @@ def main():
         content_space_saver()
-#################################################################################################################################################
-# Initializations ###############################################################################################################################
-#################################################################################################################################################
-if st.session_state['is_hf']:
-    if 'proceed_to_main' not in st.session_state:
-        st.session_state.proceed_to_main = True
-    if 'proceed_to_private' not in st.session_state:
-        st.session_state.proceed_to_private = False
-    if 'private_file' not in st.session_state:
-        st.session_state.private_file = True
-else:
-    if 'proceed_to_main' not in st.session_state:
-        st.session_state.proceed_to_main = False  # New state variable to control the flow
-    if 'private_file' not in st.session_state:
-        st.session_state.private_file = does_private_file_exist()
-        if st.session_state.private_file:
-            st.session_state.proceed_to_main = True
-    if 'proceed_to_private' not in st.session_state:
-        st.session_state.proceed_to_private = False  # New state variable to control the flow
-if 'proceed_to_build_llm_prompt' not in st.session_state:
-    st.session_state.proceed_to_build_llm_prompt = False  # New state variable to control the flow
-if 'processing_add_on' not in st.session_state:
-    st.session_state['processing_add_on'] = 0
-if 'formatted_json' not in st.session_state:
-    st.session_state['formatted_json'] = None
-if 'formatted_json_WFO' not in st.session_state:
-    st.session_state['formatted_json_WFO'] = None
-if 'formatted_json_GEO' not in st.session_state:
-    st.session_state['formatted_json_GEO'] = None
-if 'lacks_GPU' not in st.session_state:
-    st.session_state['lacks_GPU'] = not torch.cuda.is_available()
-if 'API_key_validation' not in st.session_state:
-    st.session_state['API_key_validation'] = False
-if 'present_annotations' not in st.session_state:
-    st.session_state['present_annotations'] = None
-if 'missing_annotations' not in st.session_state:
-    st.session_state['missing_annotations'] = None
-if 'date_of_check' not in st.session_state:
-    st.session_state['date_of_check'] = None
-if 'API_checked' not in st.session_state:
-    st.session_state['API_checked'] = False
-if 'API_rechecked' not in st.session_state:
-    st.session_state['API_rechecked'] = False
-if 'json_report' not in st.session_state:
-    st.session_state['json_report'] = False
-if 'hold_output' not in st.session_state:
-    st.session_state['hold_output'] = False
-if 'cost_openai' not in st.session_state:
-    st.session_state['cost_openai'] = None
-if 'cost_azure' not in st.session_state:
-    st.session_state['cost_azure'] = None
-if 'cost_google' not in st.session_state:
-    st.session_state['cost_google'] = None
-if 'cost_mistral' not in st.session_state:
-    st.session_state['cost_mistral'] = None
-if 'cost_local' not in st.session_state:
-    st.session_state['cost_local'] = None
-if 'settings_filename' not in st.session_state:
-    st.session_state['settings_filename'] = None
-if 'loaded_settings_filename' not in st.session_state:
-    st.session_state['loaded_settings_filename'] = None
-if 'zip_filepath' not in st.session_state:
-    st.session_state['zip_filepath'] = None
-# Initialize session_state variables if they don't exist
-if 'prompt_info' not in st.session_state:
-    st.session_state['prompt_info'] = {}
-if 'rules' not in st.session_state:
-    st.session_state['rules'] = {}
-# These are the fields that are in SLTPvA that are not required by another parsing valication function:
-#     "identifiedBy": "M.W. Lyon, Jr.",
-#     "recordedBy": "University of Michigan Herbarium",
-#     "recordNumber": "",
-#     "habitat": "wet subdunal woods",
-#     "occurrenceRemarks": "Indiana : Porter Co.",
-#     "degreeOfEstablishment": "",
-#     "minimumElevationInMeters": "",
-#     "maximumElevationInMeters": ""
-if 'required_fields' not in st.session_state:
-    st.session_state['required_fields'] = ['catalogNumber','order','family','scientificName',
-                                           'scientificNameAuthorship','genus','subgenus','specificEpithet','infraspecificEpithet',
-                                           'verbatimEventDate','eventDate',
-                                           'country','stateProvince','county','municipality','locality','decimalLatitude','decimalLongitude','verbatimCoordinates',]
-if 'proceed_to_build_llm_prompt' not in st.session_state:
-    st.session_state.proceed_to_build_llm_prompt = False
-if 'proceed_to_component_detector' not in st.session_state:
-    st.session_state.proceed_to_component_detector = False
-if 'proceed_to_parsing_options' not in st.session_state:
-    st.session_state.proceed_to_parsing_options = False
-if 'proceed_to_api_keys' not in st.session_state:
-    st.session_state.proceed_to_api_keys = False
-if 'proceed_to_space_saver' not in st.session_state:
-    st.session_state.proceed_to_space_saver = False
 #################################################################################################################################################
 # Main ##########################################################################################################################################
 #################################################################################################################################################
 if st.session_state['is_hf']:
-    if st.session_state.proceed_to_build_llm_prompt:
-        build_LLM_prompt_config()
-    elif st.session_state.proceed_to_main:
         main()
 else:
     if not st.session_state.private_file:
         create_private_file()
-    elif st.session_state.proceed_to_build_llm_prompt:
-        build_LLM_prompt_config()
     elif st.session_state.proceed_to_private and not st.session_state['is_hf']:
         create_private_file()
     elif st.session_state.proceed_to_main:

 import yaml, os, json, random, time, re, torch, random, warnings, shutil, sys
 import seaborn as sns
 import plotly.graph_objs as go
 from PIL import Image
 import pandas as pd
 from io import BytesIO
 from vouchervision.general_utils import test_GPU, get_cfg_from_full_path, summarize_expense_report, validate_dir
 from vouchervision.model_maps import ModelMaps
 from vouchervision.API_validation import APIvalidation
+from vouchervision.utils_hf import setup_streamlit_config, save_uploaded_file, save_uploaded_local
+from vouchervision.data_project import convert_pdf_to_jpg
+from vouchervision.utils_LLM import check_system_gpus
 #################################################################################################################################################
 # Initializations ###############################################################################################################################
 #################################################################################################################################################
+st.set_page_config(layout="wide", page_icon='img/icon.ico', page_title='VoucherVision',initial_sidebar_state="collapsed")
 # Parse the 'is_hf' argument and set it in session state
 if 'is_hf' not in st.session_state:
+    try:
+        is_hf_os = os.getenv('IS_HF')
+        if is_hf_os == 1 or is_hf_os == '1' or is_hf_os or is_hf_os == 'true' or is_hf_os == 'True':
+            st.session_state['is_hf'] = True
+        else:
+            st.session_state['is_hf'] = False
+    except:
+        st.session_state['is_hf'] = False
+print(f"is_hf {st.session_state['is_hf']}")
 # Default YAML file path
 if 'config' not in st.session_state:
     st.session_state.config, st.session_state.dir_home = build_VV_config(loaded_cfg=None)
     setup_streamlit_config(st.session_state.dir_home)
+########################################################################################################
+###  Global constants                                                                               ####
+########################################################################################################
+MAX_GALLERY_IMAGES = 20
+GALLERY_IMAGE_SIZE = 96
+########################################################################################################
+###  Init funcs                                                                                     ####
+########################################################################################################
+def does_private_file_exist():
+    dir_home = os.path.dirname(__file__)
+    path_cfg_private = os.path.join(dir_home, 'PRIVATE_DATA.yaml')
+    return os.path.exists(path_cfg_private)
+########################################################################################################
+###  Streamlit inits                         [FOR SAVE FILE]                                        ####
+########################################################################################################
+########################################################################################################
+###  Streamlit inits                         [routing]                                              ####
+########################################################################################################
+if st.session_state['is_hf']:
+    if 'proceed_to_main' not in st.session_state:
+        st.session_state.proceed_to_main = True
+    if 'proceed_to_private' not in st.session_state:
+        st.session_state.proceed_to_private = False
+    if 'private_file' not in st.session_state:
+        st.session_state.private_file = True
+else:
+    if 'proceed_to_main' not in st.session_state:
+        st.session_state.proceed_to_main = False  # New state variable to control the flow
+    if 'private_file' not in st.session_state:
+        st.session_state.private_file = does_private_file_exist()
+        if st.session_state.private_file:
+            st.session_state.proceed_to_main = True
+    if 'proceed_to_private' not in st.session_state:
+        st.session_state.proceed_to_private = False  # New state variable to control the flow
+if 'proceed_to_build_llm_prompt' not in st.session_state:
+    st.session_state.proceed_to_build_llm_prompt = False  # New state variable to control the flow
+if 'proceed_to_build_llm_prompt' not in st.session_state:
+    st.session_state.proceed_to_build_llm_prompt = False
+if 'proceed_to_component_detector' not in st.session_state:
+    st.session_state.proceed_to_component_detector = False
+if 'proceed_to_parsing_options' not in st.session_state:
+    st.session_state.proceed_to_parsing_options = False
+if 'proceed_to_api_keys' not in st.session_state:
+    st.session_state.proceed_to_api_keys = False
+if 'proceed_to_space_saver' not in st.session_state:
+    st.session_state.proceed_to_space_saver = False
+if 'proceed_to_faqs' not in st.session_state:
+    st.session_state.proceed_to_faqs = False
+########################################################################################################
+###  Streamlit inits                         [basics]                                               ####
+########################################################################################################
+if 'processing_add_on' not in st.session_state:
+    st.session_state['processing_add_on'] = 0
+if 'capability_score' not in st.session_state:
+    st.session_state['num_gpus'], st.session_state['gpu_dict'], st.session_state['total_vram_gb'], st.session_state['capability_score'] = check_system_gpus()
+if 'formatted_json' not in st.session_state:
+    st.session_state['formatted_json'] = None
+if 'formatted_json_WFO' not in st.session_state:
+    st.session_state['formatted_json_WFO'] = None
+if 'formatted_json_GEO' not in st.session_state:
+    st.session_state['formatted_json_GEO'] = None
+if 'lacks_GPU' not in st.session_state:
+    st.session_state['lacks_GPU'] = not torch.cuda.is_available()
+if 'API_key_validation' not in st.session_state:
+    st.session_state['API_key_validation'] = False
+if 'API_checked' not in st.session_state:
+    st.session_state['API_checked'] = False
+if 'API_rechecked' not in st.session_state:
+    st.session_state['API_rechecked'] = False
+if 'present_annotations' not in st.session_state:
+    st.session_state['present_annotations'] = None
+if 'missing_annotations' not in st.session_state:
+    st.session_state['missing_annotations'] = None
+if 'date_of_check' not in st.session_state:
+    st.session_state['date_of_check'] = None
+if 'json_report' not in st.session_state:
+    st.session_state['json_report'] = False
+if 'hold_output' not in st.session_state:
+    st.session_state['hold_output'] = False
+if 'cost_openai' not in st.session_state:
+    st.session_state['cost_openai'] = None
+if 'cost_azure' not in st.session_state:
+    st.session_state['cost_azure'] = None
+if 'cost_google' not in st.session_state:
+    st.session_state['cost_google'] = None
+if 'cost_mistral' not in st.session_state:
+    st.session_state['cost_mistral'] = None
+if 'cost_local' not in st.session_state:
+    st.session_state['cost_local'] = None
+if 'settings_filename' not in st.session_state:
+    st.session_state['settings_filename'] = None
+if 'loaded_settings_filename' not in st.session_state:
+    st.session_state['loaded_settings_filename'] = None
+if 'zip_filepath' not in st.session_state:
+    st.session_state['zip_filepath'] = None
+########################################################################################################
+###  Streamlit inits                         [prompt builder]                                       ####
+########################################################################################################
+# These are the fields that are in SLTPvA that are not required by another parsing valication function:
+#     "identifiedBy": "M.W. Lyon, Jr.",
+#     "recordedBy": "University of Michigan Herbarium",
+#     "recordNumber": "",
+#     "habitat": "wet subdunal woods",
+#     "occurrenceRemarks": "Indiana : Porter Co.",
+#     "degreeOfEstablishment": "",
+#     "minimumElevationInMeters": "",
+#     "maximumElevationInMeters": ""
+if 'required_fields' not in st.session_state:
+    st.session_state['required_fields'] = ['catalogNumber','order','family','scientificName',
+                                           'scientificNameAuthorship','genus','subgenus','specificEpithet','infraspecificEpithet',
+                                           'verbatimEventDate','eventDate',
+                                           'country','stateProvince','county','municipality','locality','decimalLatitude','decimalLongitude','verbatimCoordinates',]
+if 'prompt_info' not in st.session_state:
+    st.session_state['prompt_info'] = {}
+if 'rules' not in st.session_state:
+    st.session_state['rules'] = {}
+########################################################################################################
+###  Streamlit inits                         [gallery]                                              ####
+########################################################################################################
 if 'uploader_idk' not in st.session_state:
     st.session_state['uploader_idk'] = 1
 if 'input_list_small' not in st.session_state:
     st.session_state['dir_uploaded_images_small'] = os.path.join(st.session_state.dir_home,'uploads_small')
     validate_dir(os.path.join(st.session_state.dir_home,'uploads_small'))
+########################################################################################################
+###  CONTENT                                  []                                             ####
+########################################################################################################
 def content_input_images(col_left, col_right):
     st.write('---')
     # col1, col2 = st.columns([2,8])
         if st.session_state.is_hf:
             st.session_state['dir_uploaded_images'] = os.path.join(st.session_state.dir_home,'uploads')
             st.session_state['dir_uploaded_images_small'] = os.path.join(st.session_state.dir_home,'uploads_small')
+            uploaded_files = st.file_uploader("Upload Images", type=['jpg', 'jpeg','pdf'], accept_multiple_files=True, key=st.session_state['uploader_idk'])
             st.button("Use Test Image",help="This will clear any uploaded images and load the 1 provided test image.",on_click=use_test_image)
     with col_right:
                 # Clear input image gallery and input list
                 clear_image_gallery()
                 for uploaded_file in uploaded_files:
+                    # Determine the file type
+                    if uploaded_file.name.lower().endswith('.pdf'):
+                        # Handle PDF files
+                        file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
+                        # Convert each page of the PDF to an image
+                        n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=st.session_state.config['leafmachine']['project']['dir_images_local'])
+                        # Update the input list for each page image
+                        converted_files = os.listdir(st.session_state['dir_uploaded_images'])
+                        for file_name in converted_files:
+                            if file_name.lower().endswith('.jpg'):
+                                jpg_file_path = os.path.join(st.session_state['dir_uploaded_images'], file_name)
+                                st.session_state['input_list'].append(jpg_file_path)
+                                # Optionally, create a thumbnail for the gallery
+                                img = Image.open(jpg_file_path)
+                                img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
+                                file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
+                                st.session_state['input_list_small'].append(file_path_small)
+                    else:
+                        # Handle JPG/JPEG files (existing process)
+                        file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
+                        st.session_state['input_list'].append(file_path)
+                        img = Image.open(file_path)
+                        img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
+                        file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
+                        st.session_state['input_list_small'].append(file_path_small)
+                # After processing all files
+                st.info(f"Processing images from {st.session_state.config['leafmachine']['project']['dir_images_local']}")
             if st.session_state['input_list_small']:
                 if len(st.session_state['input_list_small']) > MAX_GALLERY_IMAGES:
                 st.session_state['dir_images_local_TEMP'] = st.session_state.config['leafmachine']['project']['dir_images_local']
                 print("rerun")
                 st.rerun()
 def list_jpg_files(directory_path):
     jpg_count = 0
         st.session_state['input_list_small'].append(file_path_small)
 def refresh():
     st.session_state['uploader_idk'] += 1
     st.write('')
 # def display_image_gallery():
 #     # Initialize the container
 #     con_image = st.empty()
     # st.session_state.private_file = does_private_file_exist()
 # Function to load a YAML file and update session_state
 ### Updated to match HF version
 # def save_prompt_yaml(filename):
 def show_header_welcome():
     st.session_state.logo_path = os.path.join(st.session_state.dir_home, 'img','logo.png')
     st.session_state.logo = Image.open(st.session_state.logo_path)
     with col_run_4:
         with st.expander("View Messages and Updates"):
             st.info("***Note:*** If you use VoucherVision frequently, you can change the default values that are auto-populated in the form below. In a text editor or IDE, edit the first few rows in the file `../VoucherVision/vouchervision/VoucherVision_Config_Builder.py`")
+        st.info("Please enable LeafMachine2 collage for full-sized images of herbarium vouchers, you will get better results!")
     col_test = st.container()
     col_json, col_json_WFO, col_json_GEO, col_json_map = st.columns([2, 2, 2, 2])
     with col_run_info_1:
         # Progress
         overall_progress_bar = st.progress(0)
         text_overall = st.empty()  # Placeholder for current step name
         batch_progress_bar = st.progress(0)
         text_batch = st.empty()  # Placeholder for current step name
         progress_report = ProgressReport(overall_progress_bar, batch_progress_bar, text_overall, text_batch)
         st.session_state['hold_output'] = st.toggle('View Final Transcription')
     with col_logo:
         show_header_welcome()
     with col_run_1:
         N_STEPS = 6
         if check_if_usable(is_hf=st.session_state['is_hf']):
             b_text = f"Start Processing {st.session_state['processing_add_on']} Images" if st.session_state['processing_add_on'] > 1 else f"Start Processing {st.session_state['processing_add_on']} Image"
             if st.session_state['processing_add_on'] == 0:
                 total_cost = 0.00
                 n_failed_OCR = 0
                 n_failed_LLM_calls = 0
+                # try:
+                voucher_vision_output = voucher_vision(None,
+                                                    st.session_state.dir_home,
+                                                    path_custom_prompts,
+                                                    None,
+                                                    progress_report,
+                                                    st.session_state['json_report'],
+                                                    path_api_cost=os.path.join(st.session_state.dir_home,'api_cost','api_cost.yaml'),
+                                                    is_hf = st.session_state['is_hf'],
+                                                    is_real_run=True)
+                st.session_state['formatted_json'] = voucher_vision_output['last_JSON_response']
+                st.session_state['formatted_json_WFO'] = voucher_vision_output['final_WFO_record']
+                st.session_state['formatted_json_GEO'] = voucher_vision_output['final_GEO_record']
+                total_cost = voucher_vision_output['total_cost']
+                n_failed_OCR = voucher_vision_output['n_failed_OCR']
+                n_failed_LLM_calls = voucher_vision_output['n_failed_LLM_calls']
+                st.session_state['zip_filepath'] = voucher_vision_output['zip_filepath']
+                # st.balloons()
+                # except Exception as e:
+                #     with col_run_4:
+                #         st.error(f"Transcription failed. Error: {e}")
                 if n_failed_OCR > 0:
                     with col_run_4:
     with ct_left:
         st.button("Refresh", on_click=refresh, use_container_width=True)
     with ct_right:
+        # st.page_link(os.path.join(os.path.dirname(__file__),"pages","faqs.py"), label="FAQs", icon="❔")
+        st.page_link(os.path.join("pages","faqs.py"), label="FAQs", icon="❔")
+        # if st.button('FAQs', use_container_width=True):
+        #     st.session_state.proceed_to_faqs = True
+        #     st.session_state.proceed_to_main = False
+        #     st.rerun()
     # with col_run_2:
     #     if st.button("Test GPT"):
 def content_project_settings(col):
          ### Project
     with col:
             st.session_state.config['leafmachine']['project']['prompt_version'] = st.selectbox("Prompt Version", available_prompts, index=available_prompts.index(selected_version),label_visibility='collapsed')
     with col_prompt_2:
+        # if st.button("Build Custom LLM Prompt"):
+        # st.page_link(os.path.join(os.path.dirname(__file__),"pages","prompt_builder.py"), label="Prompt Builder", icon="🚧")
+        st.page_link(os.path.join("pages","prompt_builder.py"), label="Prompt Builder", icon="🚧")
     st.header('LLM Version')
     col_llm_1, col_llm_2 = st.columns([4,2])
                 st.rerun()
+def adjust_ocr_options_based_on_capability(capability_score):
+    llava_models_requirements = {
+        "liuhaotian/llava-v1.6-mistral-7b": {"full": 18, "4bit": 9},
+        "liuhaotian/llava-v1.6-34b": {"full": 70, "4bit": 25},
+        "liuhaotian/llava-v1.6-vicuna-13b": {"full": 33, "4bit": 15},
+        "liuhaotian/llava-v1.6-vicuna-7b": {"full": 20, "4bit": 10},
+    }
+    if capability_score == 'no_gpu':
+        return False
+    else:
+        capability_score_n = int(capability_score.split("_")[1].split("GB")[0])
+        supported_models = [model for model, reqs in llava_models_requirements.items()
+                            if reqs["full"] <= capability_score_n or reqs["4bit"] <= capability_score_n]
+        # If no models are supported, disable the LLaVA option
+        if not supported_models:
+            # Assuming the LLaVA option is the last in your list
+            return False  # Indicate LLaVA is not supported
+        return True  # Indicate LLaVA is supported
+def content_ocr_method():
     st.write("---")
+    st.header('OCR Methods')
+    with st.expander("Read about available OCR methods"):
+        st.subheader("Overview")
+        st.markdown("""VoucherVision can use the `Google Vision API`, `CRAFT` text detection + `trOCR`, and all `LLaVA v1.6` models.
+                    VoucherVision sends the OCR inside of the LLM prompt. We have found that sending multiple copies, or multiple version of
+                    the OCR text to the LLM helps the LLM maintain focus on the OCR text -- our prompts are quite long and the OCR text is reletively short.
+                    Below you can choose the OCR method/s. You can 'stack' all of the methods if you want, which may improve results because
+                    different OCR methods have different strengths, giving the LLM more information to work with. Alternative.y, you can select a single method and
+                    send 2 copies to the LLM by enabling that option below.""")
+        st.subheader("Google Vision API")
+        st.markdown("""`Google Vision API` provides several OCR methods. We use the `document_text_detection()` service, designed to handle dense text blocks.
+                    The `Handwritten` option CAN also be used for printed and mixed labels, but it is also optimized for handwriting. `Handwritten` uses the Google Vision Beta service.
+                    This is the recommended default OCR method. `Printed` uses the regular Google Vision service and works well for general use.
+                    You can also supplement Google Vision OCR by enabling trOCR, which is optimized for handwriting. trOCR requires segmented word images, which is provided as part
+                    of the Google Vision metadata. trOCR does not require a GPU, but it runs *much* faster with a GPU.""")
+        st.subheader("LLaVA")
+        st.markdown("""`LLaVA` can replace Google Vision APIs. It requires the use of LeafMachine2 collage, or images that are majority text. It may struggle with very
+                    long texts. LLaVA models are multimodal, meaning that we can upload the image and the model will transcribe (and even parse) the text all at once. With VoucherVision, we
+                    support 4 different LLaVA models of varying sizes, some are much more capable than others. These models tend to outperform all other OCR methods for handwriting.
+                    LLaVA models are run locally and require powerful GPUs to implement. While LLaVA models are capable of handling both the OCR and text parsing tasks all in one step,
+                    this option only uses LLaVA to transcribe all of the text in the image and still uses a separate LLM to parse text in to categories. """)
+        st.subheader("CRAFT + trOCR")
+        st.markdown("""This pairing can replace Google Vision APIs and is computationally lighter than LLaVA. `CRAFT` locates text, segments lines of text, and feeds the segmentations
+                    to the `trOCR` transformer model. This pairing requires at least an 8 GB GPU. trOCR is a Microsoft model optimized for handwriting. The base model is not as accurate as
+                    LLaVA or Google Vision, but if you have a trOCR-based model, let us know and we will add support.""")
+    c1, c2 = st.columns([4,4])
+    # Check if LLaVA models are supported based on capability score
+    llava_supported = adjust_ocr_options_based_on_capability(st.session_state.capability_score)
+    if llava_supported:
+        st.success("LLaVA models are supported on this computer")
+    else:
+        st.warning("LLaVA models are NOT supported on this computer. Requires a GPU with at least 12 GB of VRAM.")
     demo_text_h = f"Google_OCR_Handwriting:\nHERBARIUM OF MARCUS W. LYON , JR . Tracaulon sagittatum Indiana : Porter Co. incal Springs edge wet subdunal woods 1927 TX 11 Ilowers pink UNIVERSITE HERBARIUM MICH University of Michigan Herbarium 1439649 copyright reserved PERSICARIA FEB 2 6 1965 cm "
     demo_text_tr = f"trOCR:\nherbarium of marcus w. lyon jr. : : : tracaulon sagittatum indiana porter co. incal springs TX 11 Ilowers pink  1439649 copyright reserved D H U Q "
     demo_text_p = f"Google_OCR_Printed:\nTracaulon sagittatum Indiana : Porter Co. incal Springs edge wet subdunal woods 1927  Ilowers pink 1439649 copyright reserved PERSICARIA FEB 2 6 1965 cm "
     demo_text_trh = demo_text_h + '\n' + demo_text_tr
     demo_text_trp = demo_text_p + '\n' + demo_text_tr
+    options = ["Google Vision Handwritten", "Google Vision Printed", "CRAFT + trOCR","LLaVA"]
+    options_llava = ["llava-v1.6-mistral-7b", "llava-v1.6-34b", "llava-v1.6-vicuna-13b", "llava-v1.6-vicuna-7b",]
+    options_llava_bit = ["full", "4bit",]
+    captions_llava = [
+        "Full Model: 18 GB VRAM, 4-bit: 9 GB VRAM",
+        "Full Model: 70 GB VRAM, 4-bit: 25 GB VRAM",
+        "Full Model: 33 GB VRAM, 4-bit: 15 GB VRAM",
+        "Full Model: 20 GB VRAM, 4-bit: 10 GB VRAM",
+    ]
+    captions_llava_bit = ["Full Model","4-bit Quantization",]
+    # Get the current OCR option from session state
+    OCR_option = st.session_state.config['leafmachine']['project']['OCR_option']
+    OCR_option_llava = st.session_state.config['leafmachine']['project']['OCR_option_llava']
+    OCR_option_llava_bit = st.session_state.config['leafmachine']['project']['OCR_option_llava_bit']
+    double_OCR = st.session_state.config['leafmachine']['project']['double_OCR']
+    # Map the OCR option to the index in options list
+    # You need to define the mapping based on your application's logic
+    default_index = 0  # Default to 0 if option not found
+    default_index_llava = 0  # Default to 0 if option not found
+    default_index_llava_bit = 0
+    with c1:
+        st.subheader("API Methods (Google Vision)")
+        st.write("Using APIs for OCR allows VoucherVision to run on most computers.")
+        st.session_state.config['leafmachine']['project']['double_OCR'] = st.checkbox(label="Send 2 copies of the OCR to the LLM",
+                                                                                      help="This can help the LLMs focus attention on the OCR and not get lost in the longer instruction text",
+                                                                                      value=double_OCR)
+        # Create the radio button
+        # OCR_option_select = st.radio(
+        #     "Select the OCR Method",
+        #     options,
+        #     index=default_index,
+        #     help="",captions=captions,
+        # )
+        default_values = [options[default_index]]
+        OCR_option_select = st.multiselect(
+            "Select the OCR Method(s)",
+            options=options,
+            default=default_values,
+            help="Select one or more OCR methods."
+        )
+        # st.session_state.config['leafmachine']['project']['OCR_option'] = OCR_option_select
+        # Handling multiple selections (Example logic)
+        OCR_options = {
+            "Google Vision Handwritten": 'hand',
+            "Google Vision Printed": 'normal',
+            "CRAFT + trOCR": 'CRAFT',
+            "LLaVA": 'LLaVA',
+        }
+        # Map selected options to their corresponding internal representations
+        selected_OCR_options = [OCR_options[option] for option in OCR_option_select]
+        # Assuming you need to use these mapped values elsewhere in your application
+        st.session_state.config['leafmachine']['project']['OCR_option'] = selected_OCR_options
+    with c2:
+        st.subheader("Local Methods")
+        st.write("Local methods are free, but require a capable GPU. ")
+    st.write("Supplement Google Vision OCR with trOCR (handwriting OCR) using `microsoft/trocr-base-handwritten`. This option requires Google Vision API and a GPU.")
+    if 'CRAFT' in selected_OCR_options:
+        do_use_trOCR = st.checkbox("Enable trOCR", value=True, key="Enable trOCR1",disabled=True)#,disabled=st.session_state['lacks_GPU'])
+    else:
+        do_use_trOCR = st.checkbox("Enable trOCR", value=st.session_state.config['leafmachine']['project']['do_use_trOCR'],key="Enable trOCR2")#,disabled=st.session_state['lacks_GPU'])
+        st.session_state.config['leafmachine']['project']['do_use_trOCR'] = do_use_trOCR
+    if 'LLaVA' in selected_OCR_options:
+        OCR_option_llava = st.radio(
+            "Select the LLaVA version",
+            options_llava,
+            index=default_index_llava,
+            help="",captions=captions_llava,
+        )
+        st.session_state.config['leafmachine']['project']['OCR_option_llava'] = OCR_option_llava
+        OCR_option_llava_bit = st.radio(
+            "Select the LLaVA quantization level",
+            options_llava_bit,
+            index=default_index_llava_bit,
+            help="",captions=captions_llava_bit,
+        )
+        st.session_state.config['leafmachine']['project']['OCR_option_llava_bit'] = OCR_option_llava_bit
+    # st.markdown("Below is an example of what the LLM would see given the choice of OCR ensemble. One, two, or three version of OCR can be fed into the LLM prompt. Typically, 'printed + handwritten' works well. If you have a GPU then you can enable trOCR.")
+    # if (OCR_option == 'hand') and not do_use_trOCR:
+    #     st.text_area(label='Handwritten/Printed',placeholder=demo_text_h,disabled=True, label_visibility='visible', height=150)
+    # elif (OCR_option == 'normal') and not do_use_trOCR:
+    #     st.text_area(label='Printed',placeholder=demo_text_p,disabled=True, label_visibility='visible', height=150)
+    # elif (OCR_option == 'both') and not do_use_trOCR:
+    #     st.text_area(label='Handwritten/Printed + Printed',placeholder=demo_text_b,disabled=True, label_visibility='visible', height=150)
+    # elif (OCR_option == 'both') and do_use_trOCR:
+    #     st.text_area(label='Handwritten/Printed + Printed + trOCR',placeholder=demo_text_trb,disabled=True, label_visibility='visible', height=150)
+    # elif (OCR_option == 'normal') and do_use_trOCR:
+    #     st.text_area(label='Printed + trOCR',placeholder=demo_text_trp,disabled=True, label_visibility='visible', height=150)
+    # elif (OCR_option == 'hand') and do_use_trOCR:
+    #     st.text_area(label='Handwritten/Printed + trOCR',placeholder=demo_text_trh,disabled=True, label_visibility='visible', height=150)
+def content_collage_overlay():
+    st.write("---")
+    col_collage, col_overlay = st.columns([4,4])
     with col_collage:
         st.header('LeafMachine2 Label Collage')
+        st.info("NOTE: We strongly recommend enabling LeafMachine2 cropping if your images are full sized herbarium sheet. Often, the OCR algorithm struggles with full sheets, but works well with the collage images. We have disabled the collage by default for this Hugging Face Space because the Space lacks a GPU and the collage creation takes a bit longer.")
         default_crops = st.session_state.config['leafmachine']['cropped_components']['save_cropped_annotations']
         st.write("Prior to transcription, use LeafMachine2 to crop all labels from input images to create label collages for each specimen image. Showing just the text labels to the OCR algorithms significantly improves performance. This runs slowly on the free Hugging Face Space, but runs quickly with a fast CPU or any GPU.")
+        st.session_state.config['leafmachine']['use_RGB_label_images'] = st.checkbox(":rainbow[Use LeafMachine2 label collage for transcriptions]", st.session_state.config['leafmachine'].get('use_RGB_label_images', False))
         option_selected_crops = st.multiselect(label="Components to crop",
         with st.expander(":frame_with_picture: View an example of the LeafMachine2 collage image"):
             st.image(st.session_state["demo_collage"], caption='LeafMachine2 Collage', output_format="PNG")
             # st.image(st.session_state["demo_collage"], caption='LeafMachine2 Collage', output_format="JPEG")
     with col_overlay:
         st.header('OCR Overlay Image')
         st.write('This will plot bounding boxes around all text that Google Vision was able to detect. If there are no boxes around text, then the OCR failed, so that missing text will not be seen by the LLM when it is creating the JSON object. The created image will be viewable in the VoucherVisionEditor.')
         do_create_OCR_helper_image = st.checkbox("Create image showing an overlay of the OCR detections",value=st.session_state.config['leafmachine']['do_create_OCR_helper_image'],disabled=True)
         st.session_state.config['leafmachine']['do_create_OCR_helper_image'] = do_create_OCR_helper_image
         if "demo_overlay" not in st.session_state:
             # ocr = os.path.join(st.session_state.dir_home,'demo', 'ba','ocr.png')
         st.subheader('Compute Options')
         st.session_state.config['leafmachine']['project']['num_workers'] = st.number_input("Number of CPU workers", value=st.session_state.config['leafmachine']['project'].get('num_workers', 1), disabled=False)
         st.session_state.config['leafmachine']['project']['batch_size'] = st.number_input("Batch size", value=st.session_state.config['leafmachine']['project'].get('batch_size', 500), help='Sets the batch size for the LeafMachine2 cropping. If computer RAM is filled, lower this value to ~100.')
+        st.session_state.config['leafmachine']['project']['pdf_conversion_dpi'] = st.number_input("PDF conversion DPI", value=st.session_state.config['leafmachine']['project'].get('pdf_conversion_dpi', 100), help='DPI of the JPG created from the page of a PDF. 100 should be fine for most cases, but 200 or 300 might be better for large images.')
     with col_processing_2:
         st.subheader('Filename Prefix Handling')
         st.session_state.config['leafmachine']['project']['prefix_removal'] = st.text_input("Remove prefix from catalog number", st.session_state.config['leafmachine']['project'].get('prefix_removal', ''),placeholder="e.g. MICH-V-")
     ### Logging and Image Validation - col_v1
     st.write("---")
     col_v1, col_v2 = st.columns(2)
     with col_v1:
+        st.header('Logging and Image Validation')
         option_check_illegal = st.checkbox("Check for illegal filenames", value=st.session_state.config['leafmachine']['do']['check_for_illegal_filenames'])
         st.session_state.config['leafmachine']['do']['check_for_illegal_filenames'] = option_check_illegal
+        option_skip_vertical = st.checkbox("Skip vertical image requirement (e.g. horizontal PDFs)", value=st.session_state.config['leafmachine']['do']['skip_vertical'],help='LeafMachine2 label collage requires images to have vertical aspect ratios for stability. If your input images have a horizonatal aspect ratio, try skipping the vertical requirement first, look for strange behavior, and then reassess. If your image/PDFs are already closeups and you do not need the collage, then skipping the vertical requirement is the right choice.')
+        st.session_state.config['leafmachine']['do']['skip_vertical'] = option_skip_vertical
         st.session_state.config['leafmachine']['do']['check_for_corrupt_images_make_vertical'] = st.checkbox("Check for corrupt images", st.session_state.config['leafmachine']['do'].get('check_for_corrupt_images_make_vertical', True),disabled=True)
         st.session_state.config['leafmachine']['print']['verbose'] = st.checkbox("Print verbose", st.session_state.config['leafmachine']['print'].get('verbose', True))
         st.session_state.config['leafmachine']['print']['optional_warnings'] = st.checkbox("Show optional warnings", st.session_state.config['leafmachine']['print'].get('optional_warnings', True))
         log_level = st.session_state.config['leafmachine']['logging'].get('log_level', None)
         log_level_display = log_level if log_level is not None else 'default'
         selected_log_level = st.selectbox("Logging Level", ['default', 'DEBUG', 'INFO', 'WARNING', 'ERROR'], index=['default', 'DEBUG', 'INFO', 'WARNING', 'ERROR'].index(log_level_display))
         else:
             st.session_state.config['leafmachine']['logging']['log_level'] = selected_log_level
+    with col_v2:
+        print(f"Number of GPUs: {st.session_state.num_gpus}")
+        print(f"GPU Details: {st.session_state.gpu_dict}")
+        print(f"Total VRAM: {st.session_state.total_vram_gb} GB")
+        print(f"Capability Score: {st.session_state.capability_score}")
+        st.header('System GPU Information')
+        st.markdown(f"**Torch CUDA:** {torch.cuda.is_available()}")
+        st.markdown(f"**Number of GPUs:** {st.session_state.num_gpus}")
+        if st.session_state.num_gpus > 0:
+            st.markdown("**GPU Details:**")
+            for gpu_id, vram in st.session_state.gpu_dict.items():
+                st.text(f"{gpu_id}: {vram}")
+            st.markdown(f"**Total VRAM:** {st.session_state.total_vram_gb} GB")
+            st.markdown(f"**Capability Score:** {st.session_state.capability_score}")
+        else:
+            st.warning("No GPUs detected in the system.")
 def content_tab_domain():
     expense_report = st.session_state.expense_report
     st.header('Expense Report Summary')
+    if not expense_summary:
+        st.warning('No expense report data available.')
+    else:
         st.metric(label="Total Cost", value=f"${round(expense_summary['total_cost_sum'], 4):,}")
         col1, col2 = st.columns(2)
         pie_chart.update_traces(marker=dict(colors=colors),)
         st.plotly_chart(pie_chart, use_container_width=True)
 def content_less_used():
     st.write('---')
     st.write(':octagonal_sign: ***NOTE:*** Settings below are not relevant for most projects. Some settings below may not be reflected in saved settings files and would need to be set each time.')
 #################################################################################################################################################
 # Sidebar #######################################################################################################################################
 #################################################################################################################################################
 def sidebar_content():
+    # st.page_link(os.path.join(os.path.dirname(__file__),'app.py'), label="Home", icon="🏠")
+    # st.page_link(os.path.join(os.path.dirname(__file__),"pages","prompt_builder.py"), label="Prompt Builder", icon="🚧")
+    # st.page_link("pages/page_2.py", label="Page 2", icon="2️⃣", disabled=True)
+    # st.page_link("http://www.google.com", label="Google", icon="🌎")
     if not os.path.exists(os.path.join(st.session_state.dir_home,'expense_report')):
         validate_dir(os.path.join(st.session_state.dir_home,'expense_report'))
     expense_report_path = os.path.join(st.session_state.dir_home, 'expense_report', 'expense_report.csv')
         st.write('Available after first run...')
 #################################################################################################################################################
 # Routing Function ##############################################################################################################################
 #################################################################################################################################################
         sidebar_content()
     # Main App
     content_header()
     col_input, col_gallery = st.columns([4,8])
     content_project_settings(col_input)
     content_input_images(col_input, col_gallery)
     col3, col4 = st.columns([1,1])
     with col3:
         content_prompt_and_llm_version()
     with col4:
         content_api_check()
+    content_ocr_method()
     content_collage_overlay()
     content_llm_cost()
     content_processing_options()
         content_space_saver()
 #################################################################################################################################################
 # Main ##########################################################################################################################################
 #################################################################################################################################################
 if st.session_state['is_hf']:
+    # if st.session_state.proceed_to_build_llm_prompt:
+    #     build_LLM_prompt_config()
+    if st.session_state.proceed_to_main:
         main()
 else:
     if not st.session_state.private_file:
         create_private_file()
+    # elif st.session_state.proceed_to_build_llm_prompt:
+    #     build_LLM_prompt_config()
     elif st.session_state.proceed_to_private and not st.session_state['is_hf']:
         create_private_file()
     elif st.session_state.proceed_to_main:

install_dependencies.sh ADDED Viewed

	@@ -0,0 +1,85 @@

+#!/bin/bash
+# List of packages to be installed
+packages=(
+    wheel
+    gputil
+    streamlit
+    streamlit-extras
+    streamlit-elements==0.1.*
+    plotly
+    google-api-python-client
+    wikipedia
+    PyMuPDF
+    craft-text-detector
+    pyyaml
+    Pillow
+    bitsandbytes
+    accelerate
+    mapboxgl
+    pandas
+    matplotlib
+    matplotlib-inline
+    tqdm
+    openai
+    langchain
+    langchain-community
+    langchain-core
+    langchain_mistralai
+    langchain_openai
+    langchain_google_genai
+    langchain_experimental
+    jsonformer
+    vertexai
+    ctransformers
+    google-cloud-aiplatform
+    tiktoken
+    llama-cpp-python
+    openpyxl
+    google-generativeai
+    google-cloud-storage
+    google-cloud-vision
+    opencv-python
+    chromadb
+    chroma-migrate
+    InstructorEmbedding
+    transformers
+    sentence-transformers
+    seaborn
+    dask
+    psutil
+    py-cpuinfo
+    Levenshtein
+    fuzzywuzzy
+    opencage
+    geocoder
+    pycountry_convert
+)
+# Function to install a single package
+install_package() {
+    package=$1
+    echo "Installing $package..."
+    pip3 install $package
+    if [ $? -ne 0 ]; then
+        echo "Failed to install $package"
+        exit 1
+    fi
+}
+# Install each package individually
+for package in "${packages[@]}"; do
+    install_package $package
+done
+echo "All packages installed successfully."
+echo "Cloning and installing LLaVA..."
+cd vouchervision
+git clone https://github.com/haotian-liu/LLaVA.git
+cd LLaVA # Assuming you want to run pip install in the LLaVA directory
+pip install -e .
+git pull
+pip install -e .
+echo "LLaVA ready"

pages/faqs.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import os
+import streamlit as st
+import streamlit.components.v1 as components
+st.set_page_config(layout="wide", page_icon='img/icon.ico', page_title='VV FAQs',initial_sidebar_state="collapsed")
+def display_faqs():
+    c1, c2, c3 = st.columns([4,6,1])
+    with c3:
+        # st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),'app.py'), label="Home", icon="🏠")
+        # st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),"pages","faqs.py"), label="FAQs", icon="❔")
+        # st.page_link(os.path.join(os.path.dirname(os.path.dirname(__file__)),"pages","report_bugs.py"), label="Report a Bug", icon="⚠️")
+        st.page_link('app.py', label="Home", icon="🏠")
+        st.page_link(os.path.join("pages","faqs.py"), label="FAQs", icon="❔")
+        st.page_link(os.path.join("pages","report_bugs.py"), label="Report a Bug", icon="⚠️")
+    with c2:
+        st.write('If you would like to get more involved, have questions, would like to see additional features, then please fill out this [Google Form](https://docs.google.com/forms/d/e/1FAIpQLSe2E9zU1bPJ1BW4PMakEQFsRmLbQ0WTBI2UXHIMEFm4WbnAVw/viewform?usp=sf_link)')
+        components.iframe(f"https://docs.google.com/forms/d/e/1FAIpQLSe2E9zU1bPJ1BW4PMakEQFsRmLbQ0WTBI2UXHIMEFm4WbnAVw/viewform?embedded=true", height=900,scrolling=True,width=640)
+    with c1:
+        st.header('FAQs')
+        st.subheader('Lead Institution')
+        st.write('- University of Michigan')
+        st.subheader('Partner Institutions')
+        st.write('- Oregon State University')
+        st.write('- University of Colorado Boulder')
+        st.write('- Botanical Research Institute of Texas')
+        st.write('- Smithsonian National Museum of Natural History')
+        st.write('- South African National Biodiversity Institute')
+        st.write('- Botanischer Garten Berlin')
+        st.write('- Freie Universität Berlin')
+        st.write('- Morton Arboretum')
+        st.write('- Florida Museum')
+        st.write('- iDigBio')
+        st.write('**More soon!**')
+display_faqs()

pages/prompt_builder.py ADDED Viewed

	@@ -0,0 +1,478 @@

+import os, yaml
+import streamlit as st
+from PIL import Image
+from itertools import chain
+from vouchervision.model_maps import ModelMaps
+from vouchervision.utils_hf import check_prompt_yaml_filename
+st.set_page_config(layout="wide", page_icon='img/icon.ico', page_title='VV Prompt Builder',initial_sidebar_state="collapsed")
+def create_download_button_yaml(file_path, selected_yaml_file, key_val):
+    file_label = f"Download {selected_yaml_file}"
+    with open(file_path, 'rb') as f:
+        st.download_button(
+            label=file_label,
+            data=f,
+            file_name=os.path.basename(file_path),
+            mime='application/x-yaml',use_container_width=True,key=key_val,
+        )
+def upload_local_prompt_to_server(dir_prompt):
+    uploaded_file = st.file_uploader("Upload a custom prompt file", type=['yaml'])
+    if uploaded_file is not None:
+        # Check the file extension
+        file_name = uploaded_file.name
+        if file_name.endswith('.yaml'):
+            file_path = os.path.join(dir_prompt, file_name)
+            # Save the file
+            with open(file_path, 'wb') as f:
+                f.write(uploaded_file.getbuffer())
+            st.success(f"Saved file {file_name} in {dir_prompt}")
+        else:
+            st.error("Please upload a .yaml file that you previously created using this Prompt Builder tool.")
+def save_prompt_yaml(filename, col):
+    yaml_content = {
+        'prompt_author': st.session_state['prompt_author'],
+        'prompt_author_institution': st.session_state['prompt_author_institution'],
+        'prompt_name': st.session_state['prompt_name'],
+        'prompt_version': st.session_state['prompt_version'],
+        'prompt_description': st.session_state['prompt_description'],
+        'LLM': st.session_state['LLM'],
+        'instructions': st.session_state['instructions'],
+        'json_formatting_instructions': st.session_state['json_formatting_instructions'],
+        'rules': st.session_state['rules'],
+        'mapping': st.session_state['mapping'],
+    }
+    dir_prompt = os.path.join(st.session_state.dir_home, 'custom_prompts')
+    filepath = os.path.join(dir_prompt, f"{filename}.yaml")
+    with open(filepath, 'w') as file:
+        yaml.safe_dump(dict(yaml_content), file, sort_keys=False)
+    st.success(f"Prompt saved as '{filename}.yaml'.")
+    with col: # added
+        create_download_button_yaml(filepath, filename,key_val=2456237465) # added
+def load_prompt_yaml(filename):
+    st.session_state['user_clicked_load_prompt_yaml'] = filename
+    with open(filename, 'r') as file:
+        st.session_state['prompt_info'] = yaml.safe_load(file)
+        st.session_state['prompt_author'] = st.session_state['prompt_info'].get('prompt_author', st.session_state['default_prompt_author'])
+        st.session_state['prompt_author_institution'] = st.session_state['prompt_info'].get('prompt_author_institution', st.session_state['default_prompt_author_institution'])
+        st.session_state['prompt_name'] = st.session_state['prompt_info'].get('prompt_name', st.session_state['default_prompt_name'])
+        st.session_state['prompt_version'] = st.session_state['prompt_info'].get('prompt_version', st.session_state['default_prompt_version'])
+        st.session_state['prompt_description'] = st.session_state['prompt_info'].get('prompt_description', st.session_state['default_prompt_description'])
+        st.session_state['instructions'] = st.session_state['prompt_info'].get('instructions', st.session_state['default_instructions'])
+        st.session_state['json_formatting_instructions'] = st.session_state['prompt_info'].get('json_formatting_instructions', st.session_state['default_json_formatting_instructions'] )
+        st.session_state['rules'] = st.session_state['prompt_info'].get('rules', {})
+        st.session_state['mapping'] = st.session_state['prompt_info'].get('mapping', {})
+        st.session_state['LLM'] = st.session_state['prompt_info'].get('LLM', 'General Purpose')
+        # Placeholder:
+        st.session_state['assigned_columns'] = list(chain.from_iterable(st.session_state['mapping'].values()))
+def btn_load_prompt(selected_yaml_file, dir_prompt):
+    if selected_yaml_file:
+        yaml_file_path = os.path.join(dir_prompt, selected_yaml_file)
+        load_prompt_yaml(yaml_file_path)
+    elif not selected_yaml_file:
+        # Directly assigning default values since no file is selected
+        st.session_state['prompt_info'] = {}
+        st.session_state['prompt_author'] = st.session_state['default_prompt_author']
+        st.session_state['prompt_author_institution'] = st.session_state['default_prompt_author_institution']
+        st.session_state['prompt_name'] = st.session_state['prompt_name']
+        st.session_state['prompt_version'] = st.session_state['prompt_version']
+        st.session_state['prompt_description'] = st.session_state['default_prompt_description']
+        st.session_state['instructions'] = st.session_state['default_instructions']
+        st.session_state['json_formatting_instructions'] = st.session_state['default_json_formatting_instructions']
+        st.session_state['rules'] = {}
+        st.session_state['LLM'] = 'General Purpose'
+        st.session_state['assigned_columns'] = []
+        st.session_state['prompt_info'] = {
+            'prompt_author': st.session_state['prompt_author'],
+            'prompt_author_institution': st.session_state['prompt_author_institution'],
+            'prompt_name': st.session_state['prompt_name'],
+            'prompt_version': st.session_state['prompt_version'],
+            'prompt_description': st.session_state['prompt_description'],
+            'instructions': st.session_state['instructions'],
+            'json_formatting_instructions': st.session_state['json_formatting_instructions'],
+            'rules': st.session_state['rules'],
+            'mapping': st.session_state['mapping'],
+            'LLM': st.session_state['LLM']
+        }
+def check_unique_mapping_assignments():
+    print(st.session_state['assigned_columns'])
+    if len(st.session_state['assigned_columns']) != len(set(st.session_state['assigned_columns'])):
+        st.error("Each column name must be assigned to only one category.")
+        return False
+    elif not st.session_state['assigned_columns']:
+        st.error("No columns have been mapped.")
+        return False
+    elif  len(st.session_state['assigned_columns']) != len(st.session_state['rules'].keys()):
+        incomplete = [item for item in list(st.session_state['rules'].keys()) if item not in st.session_state['assigned_columns']]
+        st.warning(f"These columns have been mapped: {st.session_state['assigned_columns']}")
+        st.error(f"However, these columns must be mapped before the prompt is complete: {incomplete}")
+        return False
+    else:
+        st.success("Mapping confirmed.")
+        return True
+def build_LLM_prompt_config():
+    col_main1, col_main2 = st.columns([10,2])
+    with col_main1:
+        st.session_state.logo_path = os.path.join(st.session_state.dir_home, 'img','logo.png')
+        st.session_state.logo = Image.open(st.session_state.logo_path)
+        st.image(st.session_state.logo, width=250)
+    with col_main2:
+        st.page_link('app.py', label="Home", icon="🏠")
+        st.page_link(os.path.join("pages","faqs.py"), label="FAQs", icon="❔")
+        st.page_link(os.path.join("pages","report_bugs.py"), label="Report a Bug", icon="⚠️")
+        # st.page_link("pages/page_2.py", label="Page 2", icon="2️⃣", disabled=True)
+        # st.page_link("http://www.google.com", label="Google", icon="🌎")
+    st.session_state['assigned_columns'] = []
+    st.session_state['default_prompt_author'] = 'unknown'
+    st.session_state['default_prompt_author_institution'] = 'unknown'
+    st.session_state['default_prompt_name'] = 'custom_prompt'
+    st.session_state['default_prompt_version'] = 'v-1-0'
+    st.session_state['default_prompt_author_institution'] = 'unknown'
+    st.session_state['default_prompt_description'] = 'unknown'
+    st.session_state['default_LLM'] = 'General Purpose'
+    st.session_state['default_instructions'] = """1. Refactor the unstructured OCR text into a dictionary based on the JSON structure outlined below.
+2. Map the unstructured OCR text to the appropriate JSON key and populate the field given the user-defined rules.
+3. JSON key values are permitted to remain empty strings if the corresponding information is not found in the unstructured OCR text.
+4. Duplicate dictionary fields are not allowed.
+5. Ensure all JSON keys are in camel case.
+6. Ensure new JSON field values follow sentence case capitalization.
+7. Ensure all key-value pairs in the JSON dictionary strictly adhere to the format and data types specified in the template.
+8. Ensure output JSON string is valid JSON format. It should not have trailing commas or unquoted keys.
+9. Only return a JSON dictionary represented as a string. You should not explain your answer."""
+    st.session_state['default_json_formatting_instructions'] = """This section provides rules for formatting each JSON value organized by the JSON key."""
+    # Start building the Streamlit app
+    col_prompt_main_left, ___, col_prompt_main_right = st.columns([6,1,3])
+    with col_prompt_main_left:
+        st.title("Custom LLM Prompt Builder")
+        st.subheader('About')
+        st.write("This form allows you to craft a prompt for your specific task. You can also edit the JSON yaml files directly, but please try loading the prompt back into this form to ensure that the formatting is correct. If this form cannot load your manually edited JSON yaml file, then it will not work in VoucherVision.")
+        st.subheader(':rainbow[How it Works]')
+        st.write("1. Edit this page until you are happy with your instructions. We recommend looking at the basic structure, writing down your prompt inforamtion in a Word document so that it does not randomly disappear, and then copying and pasting that info into this form once your whole prompt structure is defined.")
+        st.write("2. After you enter all of your prompt instructions, click 'Save' and give your file a name.")
+        st.write("3. This file will be saved as a yaml configuration file in the `..VoucherVision/custom_prompts` folder.")
+        st.write("4. When you go back the main VoucherVision page you will now see your custom prompt available in the 'Prompt Version' dropdown menu.")
+        st.write("5. The LLM ***only*** sees information from the 'instructions', 'rules', and 'json_formatting_instructions' sections. All other information is for versioning and integration with VoucherVisionEditor.")
+        st.write("---")
+        st.header('Load an Existing Prompt Template')
+        st.write("By default, this form loads the minimum required transcription fields but does not provide rules for each field. You can also load an existing prompt as a template, editing or deleting values as needed.")
+        dir_prompt = os.path.join(st.session_state.dir_home, 'custom_prompts')
+        yaml_files = [f for f in os.listdir(dir_prompt) if f.endswith('.yaml')]
+        col_load_text, col_load_btn, col_load_btn2 = st.columns([8,2,2])
+        with col_load_text:
+        # Dropdown for selecting a YAML file
+            st.session_state['selected_yaml_file'] = st.selectbox('Select a prompt .YAML file to load:', [''] + yaml_files)
+        with col_load_btn:
+            st.write('##')
+            # Button to load the selected prompt
+            st.button('Load Prompt', on_click=btn_load_prompt, args=[st.session_state['selected_yaml_file'], dir_prompt],use_container_width=True)
+        with col_load_btn2:
+            if st.session_state['selected_yaml_file']:
+                # Construct the full path to the file
+                download_file_path = os.path.join(dir_prompt, st.session_state['selected_yaml_file'] )
+                # Create the download button
+                st.write('##')
+                create_download_button_yaml(download_file_path, st.session_state['selected_yaml_file'],key_val=345798)
+        # Prompt Author Information
+        st.write("---")
+        st.header("Prompt Author Information")
+        st.write("We value community contributions! Please provide your name(s) (or pseudonym if you prefer) for credit. If you leave this field blank, it will say 'unknown'.")
+        if 'prompt_author' not in st.session_state:# != st.session_state['default_prompt_author']:
+            st.session_state['prompt_author'] = st.text_input("Enter names of prompt author(s)", value=st.session_state['default_prompt_author'],key=1111)
+        else:
+            st.session_state['prompt_author'] = st.text_input("Enter names of prompt author(s)", value=st.session_state['prompt_author'],key=1112)
+        # Institution
+        st.write("Please provide your institution name. If you leave this field blank, it will say 'unknown'.")
+        if 'prompt_author_institution' not in st.session_state:
+            st.session_state['prompt_author_institution'] = st.text_input("Enter name of institution", value=st.session_state['default_prompt_author_institution'],key=1113)
+        else:
+            st.session_state['prompt_author_institution'] = st.text_input("Enter name of institution", value=st.session_state['prompt_author_institution'],key=1114)
+        # Prompt name
+        st.write("Please provide a simple name for your prompt. If you leave this field blank, it will say 'custom_prompt'.")
+        if 'prompt_name' not in st.session_state:
+            st.session_state['prompt_name'] = st.text_input("Enter prompt name", value=st.session_state['default_prompt_name'],key=1115)
+        else:
+            st.session_state['prompt_name'] = st.text_input("Enter prompt name", value=st.session_state['prompt_name'],key=1116)
+        # Prompt verion
+        st.write("Please provide a version identifier for your prompt. If you leave this field blank, it will say 'v-1-0'.")
+        if 'prompt_version' not in st.session_state:
+            st.session_state['prompt_version'] = st.text_input("Enter prompt version", value=st.session_state['default_prompt_version'],key=1117)
+        else:
+            st.session_state['prompt_version'] = st.text_input("Enter prompt version", value=st.session_state['prompt_version'],key=1118)
+        st.write("Please provide a description of your prompt and its intended task. Is it designed for a specific collection? Taxa? Database structure?")
+        if 'prompt_description' not in st.session_state:
+            st.session_state['prompt_description'] = st.text_input("Enter description of prompt", value=st.session_state['default_prompt_description'],key=1119)
+        else:
+            st.session_state['prompt_description'] = st.text_input("Enter description of prompt", value=st.session_state['prompt_description'],key=11111)
+        st.write('---')
+        st.header("Set LLM Model Type")
+        # Define the options for the dropdown
+        llm_options_general = ["General Purpose",
+                       "OpenAI GPT Models","Google PaLM2 Models","Google Gemini Models","MistralAI Models",]
+        llm_options_all = ModelMaps.get_models_gui_list()
+        if 'LLM' not in st.session_state:
+            st.session_state['LLM'] = st.session_state['default_LLM']
+        if st.session_state['LLM']:
+            llm_options = llm_options_general + llm_options_all + [st.session_state['LLM']]
+        else:
+            llm_options = llm_options_general + llm_options_all
+        # Create the dropdown and set the value to session_state['LLM']
+        st.write("Which LLM is this prompt designed for? This will not restrict its use to a specific LLM, but some prompts will behave differently across models.")
+        st.write("SLTPvA prompts have been validated with all supported LLMs, but perfornce may vary. If you design a prompt to work best with a specific model, then you can indicate the model here.")
+        st.write("For general purpose prompts (like the SLTPvA prompts) just use the 'General Purpose' option.")
+        st.session_state['LLM'] = st.selectbox('Set LLM', llm_options, index=llm_options.index(st.session_state.get('LLM', 'General Purpose')))
+        st.write('---')
+        # Instructions Section
+        st.header("Instructions")
+        st.write("These are the general instructions that guide the LLM through the transcription task. We recommend using the default instructions unless you have a specific reason to change them.")
+        if 'instructions' not in st.session_state:
+            st.session_state['instructions'] = st.text_area("Enter guiding instructions", value=st.session_state['default_instructions'].strip(), height=350,key=111112)
+        else:
+            st.session_state['instructions'] = st.text_area("Enter guiding instructions", value=st.session_state['instructions'].strip(), height=350,key=111112)
+        st.write('---')
+        # Column Instructions Section
+        st.header("JSON Formatting Instructions")
+        st.write("The following section tells the LLM how we want to structure the JSON dictionary. We do not recommend changing this section because it would likely result in unstable and inconsistent behavior.")
+        if 'json_formatting_instructions' not in st.session_state:
+            st.session_state['json_formatting_instructions'] = st.text_area("Enter general JSON guidelines", value=st.session_state['default_json_formatting_instructions'],key=111114)
+        else:
+            st.session_state['json_formatting_instructions'] = st.text_area("Enter general JSON guidelines", value=st.session_state['json_formatting_instructions'],key=111115)
+        st.write('---')
+        col_left, col_right = st.columns([6,4])
+        null_value_rules = ''
+        c_name = "EXAMPLE_COLUMN_NAME"
+        c_value = "REPLACE WITH DESCRIPTION"
+        with col_left:
+            st.subheader('Add/Edit Columns')
+            st.markdown("The pre-populated fields are REQUIRED for downstream validation steps. They must be in all prompts.")
+            # Initialize rules in session state if not already present
+            if 'rules' not in st.session_state or not st.session_state['rules']:
+                for required_col in st.session_state['required_fields']:
+                    st.session_state['rules'][required_col] = c_value
+            # Layout for adding a new column name
+            # col_text, col_textbtn = st.columns([8, 2])
+            # with col_text:
+            st.session_state['new_column_name'] = st.text_input("Enter a new column name:")
+            # with col_textbtn:
+            # st.write('##')
+            if st.button("Add New Column") and st.session_state['new_column_name']:
+                if st.session_state['new_column_name'] not in st.session_state['rules']:
+                    st.session_state['rules'][st.session_state['new_column_name']] = c_value
+                    st.success(f"New column '{st.session_state['new_column_name']}' added. Now you can edit its properties.")
+                    st.session_state['new_column_name'] = ''
+                else:
+                    st.error("Column name already exists. Please enter a unique column name.")
+                    st.session_state['new_column_name'] = ''
+            # Get columns excluding the protected "catalogNumber"
+            st.write('#')
+            # required_columns = [col for col in st.session_state['rules'] if col not in st.session_state['required_fields']]
+            editable_columns = [col for col in st.session_state['rules'] if col not in ["catalogNumber"]]
+            removable_columns = [col for col in st.session_state['rules'] if col not in st.session_state['required_fields']]
+            st.session_state['current_rule'] = st.selectbox("Select a column to edit:", [""] + editable_columns)
+            # column_name = st.selectbox("Select a column to edit:", editable_columns)
+            # Form for input fields
+            with st.form(key='rule_form'):
+                # format_options = ["verbatim transcription", "spell check transcription", "boolean yes no", "boolean 1 0", "integer", "[list]", "yyyy-mm-dd"]
+                # current_rule["format"] = st.selectbox("Format:", format_options, index=format_options.index(current_rule["format"]) if current_rule["format"] else 0)
+                # current_rule["null_value"] = st.text_input("Null value:", value=current_rule["null_value"])
+                if st.session_state['current_rule']:
+                    current_rule_description = st.text_area("Description of category:", value=st.session_state['rules'][st.session_state['current_rule']])
+                else:
+                    current_rule_description = ''
+                commit_button = st.form_submit_button("Commit Column")
+            # Handle commit action
+            if commit_button and st.session_state['current_rule']:
+                # Commit the rules to the session state.
+                st.session_state['rules'][st.session_state['current_rule']] = current_rule_description
+                st.success(f"Column '{st.session_state['current_rule']}' added/updated in rules.")
+                # Force the form to reset by clearing the fields from the session state
+                st.session_state.pop('current_rule', None)  # Clear the selected column to force reset
+            delete_column_name = st.selectbox("Select a column to delete:", [""] + removable_columns)
+            # with del_colbtn:
+            # st.write('##')
+            if st.button("Delete Column") and delete_column_name:
+                del st.session_state['rules'][delete_column_name]
+                st.success(f"Column '{delete_column_name}' removed from rules.")
+        with col_right:
+            # Display the current state of the JSON rules
+            st.subheader('Formatted Columns')
+            st.json(st.session_state['rules'])
+        st.write('---')
+        col_left_mapping, col_right_mapping = st.columns([6,4])
+        with col_left_mapping:
+            st.header("Mapping")
+            st.write("Assign each column name to a single category.")
+            st.session_state['refresh_mapping'] = False
+            # Dynamically create a list of all column names that can be assigned
+            # This assumes that the column names are the keys in the dictionary under 'rules'
+            all_column_names = list(st.session_state['rules'].keys())
+            categories = ['TAXONOMY', 'GEOGRAPHY', 'LOCALITY', 'COLLECTING', 'MISC']
+            if ('mapping' not in st.session_state) or (st.session_state['mapping'] == {}):
+                st.session_state['mapping'] = {category: [] for category in categories}
+            for category in categories:
+                # Filter out the already assigned columns
+                available_columns = [col for col in all_column_names if col not in st.session_state['assigned_columns'] or col in st.session_state['mapping'].get(category, [])]
+                # Ensure the current mapping is a subset of the available options
+                current_mapping = [col for col in st.session_state['mapping'].get(category, []) if col in available_columns]
+                # Provide a safe default if the current mapping is empty or contains invalid options
+                safe_default = current_mapping if all(col in available_columns for col in current_mapping) else []
+                # Create a multi-select widget for the category with a safe default
+                selected_columns = st.multiselect(
+                    f"Select columns for {category}:",
+                    available_columns,
+                    default=safe_default,
+                    key=f"mapping_{category}"
+                )
+                # Update the assigned_columns based on the selections
+                for col in current_mapping:
+                    if col not in selected_columns and col in st.session_state['assigned_columns']:
+                        st.session_state['assigned_columns'].remove(col)
+                        st.session_state['refresh_mapping'] = True
+                for col in selected_columns:
+                    if col not in st.session_state['assigned_columns']:
+                        st.session_state['assigned_columns'].append(col)
+                        st.session_state['refresh_mapping'] = True
+                # Update the mapping in session state when there's a change
+                st.session_state['mapping'][category] = selected_columns
+            if st.session_state['refresh_mapping']:
+                st.session_state['refresh_mapping'] = False
+        # Button to confirm and save the mapping configuration
+        if st.button('Confirm Mapping'):
+            if check_unique_mapping_assignments():
+                # Proceed with further actions since the mapping is confirmed and unique
+                pass
+        with col_right_mapping:
+            # Display the current state of the JSON rules
+            st.subheader('Formatted Column Maps')
+            st.json(st.session_state['mapping'])
+        col_left_save, col_right_save = st.columns([6,4])
+        with col_left_save:
+            # Input for new file name
+            new_filename = st.text_input("Enter filename to save your prompt as a configuration YAML:",placeholder='my_prompt_name')
+            # Button to save the new YAML file
+            if st.button('Save YAML', type='primary'):
+                if new_filename:
+                    if check_unique_mapping_assignments():
+                        if check_prompt_yaml_filename(new_filename):
+                            save_prompt_yaml(new_filename, col_left_save)
+                        else:
+                            st.error("File name can only contain letters, numbers, underscores, and dashes. Cannot contain spaces.")
+                    else:
+                        st.error("Mapping contains an error. Make sure that each column is assigned to only ***one*** category.")
+                else:
+                    st.error("Please enter a filename.")
+            if st.button('Exit'):
+                st.session_state.proceed_to_build_llm_prompt = False
+                st.session_state.proceed_to_main = True
+                st.rerun()
+    with col_prompt_main_right:
+        if st.session_state['user_clicked_load_prompt_yaml'] is None: # see if user has loaded a yaml to edit
+            st.session_state['show_prompt_name_e'] = f"Prompt Status  :arrow_forward:  Building prompt from scratch"
+            if st.session_state['prompt_name']:
+                st.session_state['show_prompt_name_w'] = f"New Prompt Name  :arrow_forward:  {st.session_state['prompt_name']}.yaml"
+            else:
+                st.session_state['show_prompt_name_w'] = f"New Prompt Name  :arrow_forward:  [PLEASE SET NAME]"
+        else:
+            st.session_state['show_prompt_name_e'] = f"Prompt Status: Editing  :arrow_forward:  {st.session_state['selected_yaml_file']}"
+            if st.session_state['prompt_name']:
+                st.session_state['show_prompt_name_w'] = f"New Prompt Name  :arrow_forward:  {st.session_state['prompt_name']}.yaml"
+            else:
+                st.session_state['show_prompt_name_w'] = f"New Prompt Name  :arrow_forward:  [PLEASE SET NAME]"
+        st.subheader(f'Full Prompt')
+        st.write(st.session_state['show_prompt_name_e'])
+        st.write(st.session_state['show_prompt_name_w'])
+        st.write("---")
+        st.session_state['prompt_info'] = {
+            'prompt_author': st.session_state['prompt_author'],
+            'prompt_author_institution': st.session_state['prompt_author_institution'],
+            'prompt_name': st.session_state['prompt_name'],
+            'prompt_version': st.session_state['prompt_version'],
+            'prompt_description': st.session_state['prompt_description'],
+            'LLM': st.session_state['LLM'],
+            'instructions': st.session_state['instructions'],
+            'json_formatting_instructions': st.session_state['json_formatting_instructions'],
+            'rules': st.session_state['rules'],
+            'mapping': st.session_state['mapping'],
+        }
+        st.json(st.session_state['prompt_info'])
+build_LLM_prompt_config()

pages/report_bugs.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import os
+import streamlit as st
+import streamlit.components.v1 as components
+st.set_page_config(layout="wide", page_icon='img/icon.ico', page_title='VV Report Bugs',initial_sidebar_state="collapsed")
+def display_report():
+    c1, c2, c3 = st.columns([4,6,1])
+    with c3:
+        st.page_link('app.py', label="Home", icon="🏠")
+        st.page_link(os.path.join("pages","faqs.py"), label="FAQs", icon="❔")
+        st.page_link(os.path.join("pages","report_bugs.py"), label="Report a Bug", icon="⚠️")
+    with c2:
+        st.write('To report a bug or request a new feature please fill out this [Google Form](https://docs.google.com/forms/d/e/1FAIpQLSdtW1z9Q1pGZTo5W9UeCa6PlQanP-b88iNKE6zsusRI78Itsw/viewform?usp=sf_link)')
+        components.iframe(f"https://docs.google.com/forms/d/e/1FAIpQLSdtW1z9Q1pGZTo5W9UeCa6PlQanP-b88iNKE6zsusRI78Itsw/viewform?embedded=true", height=700,scrolling=True,width=640)
+display_report()

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ

run_VoucherVision.py CHANGED Viewed

@@ -1,15 +1,10 @@
 import streamlit.web.cli as stcli
 import os, sys
-# Insert a file uploader that accepts multiple files at a time:
-# import streamlit as st
-# uploaded_files = st.file_uploader("Choose a CSV file", accept_multiple_files=True)
-# for uploaded_file in uploaded_files:
-#     bytes_data = uploaded_file.read()
-#     st.write("filename:", uploaded_file.name)
-#     st.write(bytes_data)
 # pip install protobuf==3.20.0
 def resolve_path(path):

 import streamlit.web.cli as stcli
 import os, sys
 # pip install protobuf==3.20.0
+# pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu117 nope
+# pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118
 def resolve_path(path):

vouchervision/LLM_crew_OpenAI.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import os
+from crewai import Agent, Task, Crew, Process
+from langchain_community.tools import DuckDuckGoSearchRun
+from langchain_openai import ChatOpenAI
+class AIResearchCrew:
+    def __init__(self, openai_api_key, OCR, JSON_rules, search_tool=None, llm=None):
+        # Set the OPENAI API key
+        os.environ["OPENAI_API_KEY"] = openai_api_key
+        # Initialize the search tool, defaulting to DuckDuckGoSearchRun if not provided
+        self.search_tool = search_tool if search_tool is not None else DuckDuckGoSearchRun()
+        # Initialize the LLM (Language Learning Model), if provided
+        self.llm = llm
+        # Define the agents
+        self.transcriber = Agent(
+            role='Expert Text Parser',
+            goal='Parse and rearrange unstructured OCR text into a standardized JSON dictionary',
+            backstory="""You work at a museum transcribing specimen labels.
+            Your expertise lies in precisely transcribing text and placing the text into the appropriate category.""",
+            verbose=True,
+            allow_delegation=False
+            # Optionally include llm=self.llm here if an LLM was provided
+        )
+        self.spell_check = Agent(
+            role='Spell Checker',
+            goal='Correct any typos in the JSON key values',
+            backstory="""Your job is to look at the JSON key values and use your knowledge to verify spelling. Your corrections should be incorporated into the JSON object that will be passed to the next employee, so return the spell-checked JSON dictionary or the previous JSON dictionary if no changes are required.""",
+            verbose=True,
+            allow_delegation=True,
+            # Optionally include llm=self.llm here if an LLM was provided
+        )
+        self.fact_check = Agent(
+            role='Fact Checker',
+            goal='Verify the accuracy of taxonomy and location names',
+            backstory="""Your job is to verify the plant taxonomy and geographic locations contained within the key values are accurate. You can use internet searches to check these fields. Your corrections should be incorporated into a new JSON object that will be passed to the next employee, so return the corrected JSON dictionary or the previous JSON dictionary if no changes are required.""",
+            verbose=True,
+            allow_delegation=True,
+            tools=[self.search_tool]
+            # Optionally include llm=self.llm here if an LLM was provided
+        )
+        self.validator = Agent(
+            role='Synthesis',
+            goal='Create a final museum JSON record',
+            backstory="""You must produce a final JSON dictionary only.""",
+            verbose=True,
+            allow_delegation=True,
+        )
+        # Define the tasks
+        self.task1 = Task(
+            description=f"Use your knowledge to reformat, transform, and rearrange the unstructured text to fit the following requirements:{JSON_rules}. For null values, use an empty string. This is the unformatted OCR text: {OCR}",
+            agent=self.transcriber
+        )
+        self.task2 = Task(
+            description=f"The original text is OCR text, which may contain minor typos. Your job is to check all of the key values and fix any minor typos or spelling mistakes. You should remove any extraneous characters that should not belong in an official museum record.",
+            agent=self.spell_check
+        )
+        self.task3 = Task(
+            description="""Use your knowledge or search the internet to verify the information contained within the JSON dictionary.
+            For taxonomy, use the information contained in these keys: order, family, scientificName, scientificNameAuthorship, genus, specificEpithet, infraspecificEpithet.
+            For geography, use the information contained in these keys: country, stateProvince, municipality, decimalLatitude, decimalLongitude.""",
+            agent=self.fact_check
+        )
+        self.task4 = Task(
+            description=f"Verify that the JSON dictionary is valid. If not, correct the error. Then print out the final JSON dictionary only without explanations.",
+            agent=self.validator
+        )
+        # Create the crew
+        # self.crew = Crew(
+        #     agents=[self.transcriber, self.spell_check, self.fact_check, self.validator],
+        #     tasks=[self.task1, self.task2, self.task3, self.task4],
+        #     verbose=2,  # You can set it to 1 or 2 for different logging levels
+        #     manager_llm=ChatOpenAI(temperature=0, model="gpt-4-1106-preview"),
+        #     process=Process.hierarchical,
+        # )
+        self.crew = Crew(
+            agents=[self.transcriber, self.validator],
+            tasks=[self.task1, self.task4],
+            manager_llm=ChatOpenAI(temperature=0, model="gpt-4-1106-preview"),
+            process=Process.sequential,
+            verbose=2  # You can set it to 1 or 2 for different logging levels
+        )
+    def execute_tasks(self):
+        # Kick off the process and return the result
+        result = self.crew.kickoff()
+        print("######################")
+        print(result)
+        return result
+if __name__ == "__main__":
+    openai_api_key = ""
+    OCR = "HERBARIUM OF MARYGROVE COLLEGE Name Carex scoparia V. condensa Fernald Locality Interlaken , Ind . Date 7/20/25 No ... ! Gerould Wilhelm & Laura Rericha \" Interlaken , \" was the site for many years of St. Joseph Novitiate , run by the Brothers of the Holy Cross . The buildings were on the west shore of Silver Lake , about 2 miles NE of Rolling Prairie , LaPorte Co. Indiana , ca. 41.688 \u00b0 N , 86.601 \u00b0 W Collector : Sister M. Vincent de Paul McGivney February 1 , 2011 THE UNIVERS Examined for the Flora of the Chicago Region OF 1817 MICH ! Ciscoparia SMVdeP University of Michigan Herbarium 1386297 copyright reserved cm Collector wortet 2010"
+    JSON_rules = """This is the JSON template that includes instructions for each key
+                {'catalogNumber': barcode identifier, at least 6 digits, fewer than 30 digits.,
+                'order': full scientific name of the Order in which the taxon is classified. Order must be capitalized.,
+                'family': full scientific name of the Family in which the taxon is classified. Family must be capitalized.,
+                'scientificName': scientific name of the taxon including Genus, specific epithet, and any lower classifications.,
+                'scientificNameAuthorship': authorship information for the scientificName formatted according to the conventions of the applicable Darwin Core nomenclaturalCode.,
+                'genus': taxonomic determination to Genus, Genus must be capitalized.,
+                'subgenus': name of the subgenus.,
+                'specificEpithet': The name of the first or species epithet of the scientificName. Only include the species epithet.,
+                'infraspecificEpithet': lowest or terminal infraspecific epithet of the scientificName.,
+                'identifiedBy': list of names of people, doctors, professors, groups, or organizations who identified, determined the taxon name to the subject organism. This is not the specimen collector., recordedBy list of names of people, doctors, professors, groups, or organizations.,
+                'recordNumber': identifier given to the specimen at the time it was recorded.,
+                'verbatimEventDate': The verbatim original representation of the date and time information for when the specimen was collected.,
+                'eventDate': collection date formatted as year-month-day YYYY-MM-DD., habitat habitat.,
+                'occurrenceRemarks': all descriptive text in the OCR rearranged into sensible sentences or sentence fragments.,
+                'country': country or major administrative unit.,
+                'stateProvince': state, province, canton, department, region, etc., county county, shire, department, parish etc.,
+                'municipality': city, municipality, etc., locality description of geographic information aiding in pinpointing the exact origin or location of the specimen.,
+                'degreeOfEstablishment': cultivated plants are intentionally grown by humans. Use either - unknown or cultivated.,
+                'decimalLatitude': latitude decimal coordinate.,
+                'decimalLongitude': longitude decimal coordinate., verbatimCoordinates verbatim location coordinates.,
+                'minimumElevationInMeters': minimum elevation or altitude in meters.,
+                'maximumElevationInMeters': maximum elevation or altitude in meters.}"""
+    ai_research_crew = AIResearchCrew(openai_api_key, OCR, JSON_rules)
+    result = ai_research_crew.execute_tasks()

vouchervision/LLM_local_cpu_MistralAI.py CHANGED Viewed

@@ -56,8 +56,6 @@ class LocalCPUMistralHandler:
             raise f"Unsupported GGUF model name"
         # self.model_id = f"mistralai/{self.model_name}"
-        self.gpu_usage = {'max_load': 0, 'max_memory_usage': 0, 'monitoring': True}
         self.starting_temp = float(self.STARTING_TEMP)
         self.temp_increment = float(0.2)
         self.adjust_temp = self.starting_temp

             raise f"Unsupported GGUF model name"
         # self.model_id = f"mistralai/{self.model_name}"
         self.starting_temp = float(self.STARTING_TEMP)
         self.temp_increment = float(0.2)
         self.adjust_temp = self.starting_temp

vouchervision/OCR_CRAFT.py ADDED Viewed

	@@ -0,0 +1,55 @@

+# import Craft class
+from craft_text_detector import read_image, load_craftnet_model, load_refinenet_model, get_prediction, export_detected_regions, export_extra_results, empty_cuda_cache
+def main2():
+    # import craft functions
+    # set image path and export folder directory
+    # image = 'D:/Dropbox/SLTP/benchmark_datasets/SLTP_B50_MICH_Angiospermae2/img/MICH_7375774_Polygonaceae_Persicaria_.jpg' # can be filepath, PIL image or numpy array
+    # image = 'C:/Users/Will/Downloads/test_2024_02_07__14-59-52/Original_Images/SJRw 00891 - 01141__10001.jpg'
+    image = 'D:/Dropbox/VoucherVision/demo/demo_images/MICH_16205594_Poaceae_Jouvea_pilosa.jpg'
+    output_dir = 'D:/D_Desktop/test_out_CRAFT'
+    # read image
+    image = read_image(image)
+    # load models
+    refine_net = load_refinenet_model(cuda=True)
+    craft_net = load_craftnet_model(weight_path='D:/Dropbox/VoucherVision/vouchervision/craft/craft_mlt_25k.pth', cuda=True)
+    # perform prediction
+    prediction_result = get_prediction(
+        image=image,
+        craft_net=craft_net,
+        refine_net=refine_net,
+        text_threshold=0.4,
+        link_threshold=0.7,
+        low_text=0.4,
+        cuda=True,
+        long_size=1280
+    )
+    # export detected text regions
+    exported_file_paths = export_detected_regions(
+        image=image,
+        regions=prediction_result["boxes"],
+        output_dir=output_dir,
+        rectify=True
+    )
+    # export heatmap, detection points, box visualization
+    export_extra_results(
+        image=image,
+        regions=prediction_result["boxes"],
+        heatmaps=prediction_result["heatmaps"],
+        output_dir=output_dir
+    )
+    # unload models from gpu
+    empty_cuda_cache()
+if __name__ == '__main__':
+    # main()
+    main2()

vouchervision/OCR_google_cloud_vision.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os, io, sys, inspect, statistics, json
 from statistics import mean
 # from google.cloud import vision, storage
 from google.cloud import vision
@@ -8,10 +8,16 @@ import colorsys
 from tqdm import tqdm
 from google.oauth2 import service_account
-# currentdir = os.path.dirname(os.path.abspath(
-#     inspect.getfile(inspect.currentframe())))
-# parentdir = os.path.dirname(currentdir)
-# sys.path.append(parentdir)
 '''
@@ -23,19 +29,31 @@ from google.oauth2 import service_account
       archivePrefix={arXiv},
       primaryClass={cs.CL}
 }
 '''
-class OCRGoogle:
     BBOX_COLOR = "black"
-    def __init__(self, is_hf, path, cfg, trOCR_model_version, trOCR_model, trOCR_processor, device):
         self.is_hf = is_hf
         self.path = path
         self.cfg = cfg
         self.do_use_trOCR = self.cfg['leafmachine']['project']['do_use_trOCR']
         self.OCR_option = self.cfg['leafmachine']['project']['OCR_option']
         # Initialize TrOCR components
         self.trOCR_model_version = trOCR_model_version
@@ -70,6 +88,9 @@ class OCRGoogle:
         self.trOCR_confidences = None
         self.trOCR_characters = None
         self.set_client()
     def set_client(self):
@@ -86,6 +107,131 @@ class OCRGoogle:
         credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
         return credentials
     def detect_text_with_trOCR_using_google_bboxes(self, do_use_trOCR, logger):
         CONFIDENCES = 0.80
@@ -93,33 +239,36 @@ class OCRGoogle:
         self.OCR_JSON_to_file = {}
         if not do_use_trOCR:
-            if self.OCR_option in ['normal',]:
                 self.OCR_JSON_to_file['OCR_printed'] = self.normal_organized_text
                 logger.info(f"Google_OCR_Standard:\n{self.normal_organized_text}")
-                return f"Google_OCR_Standard:\n{self.normal_organized_text}"
-            if self.OCR_option in ['hand',]:
                 self.OCR_JSON_to_file['OCR_handwritten'] = self.hand_organized_text
                 logger.info(f"Google_OCR_Handwriting:\n{self.hand_organized_text}")
-                return f"Google_OCR_Handwriting:\n{self.hand_organized_text}"
-            if self.OCR_option in ['both',]:
-                logger.info(f"Google_OCR_Standard:\n{self.normal_organized_text}\n\nGoogle_OCR_Handwriting:\n{self.hand_organized_text}")
-                return f"Google_OCR_Standard:\n{self.normal_organized_text}\n\nGoogle_OCR_Handwriting:\n{self.hand_organized_text}"
         else:
             logger.info(f'Supplementing with trOCR')
             self.trOCR_texts = []
             original_image = Image.open(self.path).convert("RGB")
-            if self.OCR_option in ['normal',]:
                 available_bounds = self.normal_bounds_word
-            elif self.OCR_option in ['hand',]:
-                available_bounds = self.hand_bounds_word
-            elif self.OCR_option in ['both',]:
                 available_bounds = self.hand_bounds_word
             else:
                 raise
@@ -127,9 +276,13 @@ class OCRGoogle:
             characters = []
             height = []
             confidences = []
             for bound in tqdm(available_bounds, desc="Processing words using Google Vision bboxes"):
                 vertices = bound["vertices"]
                 left = min([v["x"] for v in vertices])
                 top = min([v["y"] for v in vertices])
@@ -177,24 +330,31 @@ class OCRGoogle:
             self.trOCR_confidences = confidences
             self.trOCR_characters = characters
-            if self.OCR_option in ['normal',]:
                 self.OCR_JSON_to_file['OCR_printed'] = self.normal_organized_text
                 self.OCR_JSON_to_file['OCR_trOCR'] = self.trOCR_texts
                 logger.info(f"Google_OCR_Standard:\n{self.normal_organized_text}\n\ntrOCR:\n{self.trOCR_texts}")
-                return f"Google_OCR_Standard:\n{self.normal_organized_text}\n\ntrOCR:\n{self.trOCR_texts}"
-            if self.OCR_option in ['hand',]:
                 self.OCR_JSON_to_file['OCR_handwritten'] = self.hand_organized_text
                 self.OCR_JSON_to_file['OCR_trOCR'] = self.trOCR_texts
                 logger.info(f"Google_OCR_Handwriting:\n{self.hand_organized_text}\n\ntrOCR:\n{self.trOCR_texts}")
-                return f"Google_OCR_Handwriting:\n{self.hand_organized_text}\n\ntrOCR:\n{self.trOCR_texts}"
-            if self.OCR_option in ['both',]:
-                self.OCR_JSON_to_file['OCR_printed'] = self.normal_organized_text
-                self.OCR_JSON_to_file['OCR_handwritten'] = self.hand_organized_text
-                self.OCR_JSON_to_file['OCR_trOCR'] = self.trOCR_texts
-                logger.info(f"Google_OCR_Standard:\n{self.normal_organized_text}\n\nGoogle_OCR_Handwriting:\n{self.hand_organized_text}\n\ntrOCR:\n{self.trOCR_texts}")
-                return f"Google_OCR_Standard:\n{self.normal_organized_text}\n\nGoogle_OCR_Handwriting:\n{self.hand_organized_text}\n\ntrOCR:\n{self.trOCR_texts}"
-            else:
-                raise
     @staticmethod
     def confidence_to_color(confidence):
@@ -220,7 +380,7 @@ class OCRGoogle:
             if option == 'trOCR':
                 color = (0, 170, 255)
             else:
-                color = OCRGoogle.confidence_to_color(confidence)
             position = (bound["vertices"][0]["x"], bound["vertices"][0]["y"] - char_height)
             draw.text(position, character, fill=color, font=font)
@@ -258,13 +418,13 @@ class OCRGoogle:
                     bound["vertices"][2]["x"], bound["vertices"][2]["y"],
                     bound["vertices"][3]["x"], bound["vertices"][3]["y"],
                 ],
-                outline=OCRGoogle.BBOX_COLOR,
                 width=line_width_thin
             )
         # Draw a line segment at the bottom of each handwritten character
         for bound, confidence in zip(bounds, confidences):
-            color = OCRGoogle.confidence_to_color(confidence)
             # Use the bottom two vertices of the bounding box for the line
             bottom_left = (bound["vertices"][3]["x"], bound["vertices"][3]["y"] + line_width_thick)
             bottom_right = (bound["vertices"][2]["x"], bound["vertices"][2]["y"] + line_width_thick)
@@ -386,6 +546,7 @@ class OCRGoogle:
         self.normal_height = height_flat
         self.normal_confidences = confidences
         self.normal_characters = characters
     def detect_handwritten_ocr(self):
@@ -503,56 +664,112 @@ class OCRGoogle:
         self.hand_height = height_flat
         self.hand_confidences = confidences
         self.hand_characters = characters
     def process_image(self, do_create_OCR_helper_image, logger):
-        if self.OCR_option in ['normal', 'both']:
-            self.detect_text()
-        if self.OCR_option in ['hand', 'both']:
-            self.detect_handwritten_ocr()
-        if self.OCR_option not in ['normal', 'hand', 'both']:
-            self.OCR_option = 'both'
-            self.detect_text()
-            self.detect_handwritten_ocr()
-        ### Optionally add trOCR to the self.OCR for additional context
-        self.OCR = self.detect_text_with_trOCR_using_google_bboxes(self.do_use_trOCR, logger)
-        logger.info(f"OCR:\n{self.OCR}")
-        if do_create_OCR_helper_image:
             self.image = Image.open(self.path)
-            if self.OCR_option in ['normal', 'both']:
                 image_with_boxes_normal = self.draw_boxes('normal')
                 text_image_normal = self.render_text_on_black_image('normal')
                 self.merged_image_normal = self.merge_images(image_with_boxes_normal, text_image_normal)
-            if self.OCR_option in ['hand', 'both']:
                 image_with_boxes_hand = self.draw_boxes('hand')
                 text_image_hand = self.render_text_on_black_image('hand')
                 self.merged_image_hand = self.merge_images(image_with_boxes_hand, text_image_hand)
             if self.do_use_trOCR:
-                text_image_trOCR = self.render_text_on_black_image('trOCR')
             ### Merge final overlay image
             ### [original, normal bboxes, normal text]
-            if self.OCR_option in ['normal']:
                 self.overlay_image = self.merge_images(Image.open(self.path), self.merged_image_normal)
             ### [original, hand bboxes, hand text]
-            elif self.OCR_option in ['hand']:
                 self.overlay_image = self.merge_images(Image.open(self.path), self.merged_image_hand)
             ### [original, normal bboxes, normal text, hand bboxes, hand text]
             else:
                 self.overlay_image = self.merge_images(Image.open(self.path), self.merge_images(self.merged_image_normal, self.merged_image_hand))
             if self.do_use_trOCR:
-                self.overlay_image = self.merge_images(self.overlay_image, text_image_trOCR)
         else:
             self.merged_image_normal = None
             self.merged_image_hand = None
             self.overlay_image = Image.open(self.path)
 '''

+import os, io, sys, inspect, statistics, json, cv2
 from statistics import mean
 # from google.cloud import vision, storage
 from google.cloud import vision
 from tqdm import tqdm
 from google.oauth2 import service_account
+### LLaVA should only be installed if the user will actually use it.
+### It requires the most recent pytorch/Python and can mess with older systems
+try:
+    from craft_text_detector import read_image, load_craftnet_model, load_refinenet_model, get_prediction, export_detected_regions, export_extra_results, empty_cuda_cache
+except:
+    pass
+try:
+    from OCR_llava import OCRllava
+except:
+    pass
 '''
       archivePrefix={arXiv},
       primaryClass={cs.CL}
 }
+@inproceedings{baek2019character,
+  title={Character Region Awareness for Text Detection},
+  author={Baek, Youngmin and Lee, Bado and Han, Dongyoon and Yun, Sangdoo and Lee, Hwalsuk},
+  booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
+  pages={9365--9374},
+  year={2019}
+}
 '''
+class OCREngine:
     BBOX_COLOR = "black"
+    def __init__(self, logger, json_report, dir_home, is_hf, path, cfg, trOCR_model_version, trOCR_model, trOCR_processor, device):
         self.is_hf = is_hf
+        self.logger = logger
+        self.json_report = json_report
         self.path = path
         self.cfg = cfg
         self.do_use_trOCR = self.cfg['leafmachine']['project']['do_use_trOCR']
         self.OCR_option = self.cfg['leafmachine']['project']['OCR_option']
+        self.double_OCR = self.cfg['leafmachine']['project']['double_OCR']
+        self.dir_home = dir_home
         # Initialize TrOCR components
         self.trOCR_model_version = trOCR_model_version
         self.trOCR_confidences = None
         self.trOCR_characters = None
         self.set_client()
+        self.init_craft()
+        if 'LLaVA' in self.OCR_option:
+            self.init_llava()
     def set_client(self):
         credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
         return credentials
+    def init_craft(self):
+        if 'CRAFT' in self.OCR_option:
+            try:
+                self.refine_net = load_refinenet_model(cuda=True)
+                self.use_cuda = True
+            except:
+                self.refine_net = load_refinenet_model(cuda=False)
+                self.use_cuda = False
+            if self.use_cuda:
+                self.craft_net = load_craftnet_model(weight_path=os.path.join(self.dir_home,'vouchervision','craft','craft_mlt_25k.pth'), cuda=True)
+            else:
+                self.craft_net = load_craftnet_model(weight_path=os.path.join(self.dir_home,'vouchervision','craft','craft_mlt_25k.pth'), cuda=False)
+    def init_llava(self):
+        self.llava_prompt = """I need you to transcribe all of the text in this image.
+        Place the transcribed text into a JSON dictionary with this form {"Transcription_Printed_Text": "text","Transcription_Handwritten_Text": "text"}"""
+        self.model_path = "liuhaotian/" + self.cfg['leafmachine']['project']['OCR_option_llava']
+        self.model_quant = self.cfg['leafmachine']['project']['OCR_option_llava_bit']
+        self.json_report.set_text(text_main=f'Loading LLaVA model: {self.model_path} Quantization: {self.model_quant}')
+        if self.model_quant == '4bit':
+            use_4bit = True
+        elif self.model_quant == 'full':
+            use_4bit = False
+        else:
+            self.logger.info(f"Provided model quantization invlid. Using 4bit.")
+            use_4bit = True
+        self.Llava = OCRllava(self.logger, model_path=self.model_path, load_in_4bit=use_4bit, load_in_8bit=False)
+    def detect_text_craft(self):
+        # Perform prediction using CRAFT
+        image = read_image(self.path)
+        link_threshold = 0.85
+        text_threshold = 0.4
+        low_text = 0.4
+        if self.use_cuda:
+            self.prediction_result = get_prediction(
+                image=image,
+                craft_net=self.craft_net,
+                refine_net=self.refine_net,
+                text_threshold=text_threshold,
+                link_threshold=link_threshold,
+                low_text=low_text,
+                cuda=True,
+                long_size=1280
+            )
+        else:
+            self.prediction_result = get_prediction(
+                image=image,
+                craft_net=self.craft_net,
+                refine_net=self.refine_net,
+                text_threshold=text_threshold,
+                link_threshold=link_threshold,
+                low_text=low_text,
+                cuda=False,
+                long_size=1280
+            )
+        # Initialize metadata structures
+        bounds = []
+        bounds_word = []  # CRAFT gives bounds for text regions, not individual words
+        text_to_box_mapping = []
+        bounds_flat = []
+        height_flat = []
+        confidences = []  # CRAFT does not provide confidences per character, so this might be uniformly set or estimated
+        characters = []  # Simulating as CRAFT doesn't provide character-level details
+        organized_text = ""
+        total_b = len(self.prediction_result["boxes"])
+        i=0
+        # Process each detected text region
+        for box in self.prediction_result["boxes"]:
+            i+=1
+            self.json_report.set_text(text_main=f'Locating text using CRAFT --- {i}/{total_b}')
+            vertices = [{"x": int(vertex[0]), "y": int(vertex[1])} for vertex in box]
+            # Simulate a mapping for the whole detected region as a word
+            text_to_box_mapping.append({
+                "vertices": vertices,
+                "text": "detected_text"  # Placeholder, as CRAFT does not provide the text content directly
+            })
+            # Assuming each box is a word for the sake of this example
+            bounds_word.append({"vertices": vertices})
+            # For simplicity, we're not dividing text regions into characters as CRAFT doesn't provide this
+            # Instead, we create a single large 'character' per detected region
+            bounds.append({"vertices": vertices})
+            # Simulate flat bounds and height for each detected region
+            x_positions = [vertex["x"] for vertex in vertices]
+            y_positions = [vertex["y"] for vertex in vertices]
+            min_x, max_x = min(x_positions), max(x_positions)
+            min_y, max_y = min(y_positions), max(y_positions)
+            avg_height = max_y - min_y
+            height_flat.append(avg_height)
+            # Assuming uniform confidence for all detected regions
+            confidences.append(1.0)  # Placeholder confidence
+            # Adding dummy character for each box
+            characters.append("X")  # Placeholder character
+            # Organize text as a single string (assuming each box is a word)
+            # organized_text += "detected_text "  # Placeholder text
+        # Update class attributes with processed data
+        self.normal_bounds = bounds
+        self.normal_bounds_word = bounds_word
+        self.normal_text_to_box_mapping = text_to_box_mapping
+        self.normal_bounds_flat = bounds_flat  # This would be similar to bounds if not processing characters individually
+        self.normal_height = height_flat
+        self.normal_confidences = confidences
+        self.normal_characters = characters
+        self.normal_organized_text = organized_text.strip()
     def detect_text_with_trOCR_using_google_bboxes(self, do_use_trOCR, logger):
         CONFIDENCES = 0.80
         self.OCR_JSON_to_file = {}
+        ocr_parts = ''
         if not do_use_trOCR:
+            if 'normal' in self.OCR_option:
                 self.OCR_JSON_to_file['OCR_printed'] = self.normal_organized_text
                 logger.info(f"Google_OCR_Standard:\n{self.normal_organized_text}")
+                # ocr_parts = ocr_parts + f"Google_OCR_Standard:\n{self.normal_organized_text}"
+                ocr_parts = self.normal_organized_text
+            if 'hand' in self.OCR_option:
                 self.OCR_JSON_to_file['OCR_handwritten'] = self.hand_organized_text
                 logger.info(f"Google_OCR_Handwriting:\n{self.hand_organized_text}")
+                # ocr_parts = ocr_parts +  f"Google_OCR_Handwriting:\n{self.hand_organized_text}"
+                ocr_parts = self.hand_organized_text
+            # if self.OCR_option in ['both',]:
+            #     logger.info(f"Google_OCR_Standard:\n{self.normal_organized_text}\n\nGoogle_OCR_Handwriting:\n{self.hand_organized_text}")
+            #     return f"Google_OCR_Standard:\n{self.normal_organized_text}\n\nGoogle_OCR_Handwriting:\n{self.hand_organized_text}"
+            return ocr_parts
         else:
             logger.info(f'Supplementing with trOCR')
             self.trOCR_texts = []
             original_image = Image.open(self.path).convert("RGB")
+            if 'normal' in self.OCR_option or 'CRAFT' in self.OCR_option:
                 available_bounds = self.normal_bounds_word
+            elif 'hand' in self.OCR_option:
                 available_bounds = self.hand_bounds_word
+            # elif self.OCR_option in ['both',]:
+            #     available_bounds = self.hand_bounds_word
             else:
                 raise
             characters = []
             height = []
             confidences = []
+            total_b = len(available_bounds)
+            i=0
             for bound in tqdm(available_bounds, desc="Processing words using Google Vision bboxes"):
+                i+=1
+                self.json_report.set_text(text_main=f'Working on trOCR :construction: {i}/{total_b}')
                 vertices = bound["vertices"]
                 left = min([v["x"] for v in vertices])
                 top = min([v["y"] for v in vertices])
             self.trOCR_confidences = confidences
             self.trOCR_characters = characters
+            if 'normal' in self.OCR_option:
                 self.OCR_JSON_to_file['OCR_printed'] = self.normal_organized_text
                 self.OCR_JSON_to_file['OCR_trOCR'] = self.trOCR_texts
                 logger.info(f"Google_OCR_Standard:\n{self.normal_organized_text}\n\ntrOCR:\n{self.trOCR_texts}")
+                # ocr_parts = ocr_parts +  f"\nGoogle_OCR_Standard:\n{self.normal_organized_text}\n\ntrOCR:\n{self.trOCR_texts}"
+                ocr_parts = self.trOCR_texts
+            if 'hand' in self.OCR_option:
                 self.OCR_JSON_to_file['OCR_handwritten'] = self.hand_organized_text
                 self.OCR_JSON_to_file['OCR_trOCR'] = self.trOCR_texts
                 logger.info(f"Google_OCR_Handwriting:\n{self.hand_organized_text}\n\ntrOCR:\n{self.trOCR_texts}")
+                # ocr_parts = ocr_parts +  f"\nGoogle_OCR_Handwriting:\n{self.hand_organized_text}\n\ntrOCR:\n{self.trOCR_texts}"
+                ocr_parts = self.trOCR_texts
+            # if self.OCR_option in ['both',]:
+            #     self.OCR_JSON_to_file['OCR_printed'] = self.normal_organized_text
+            #     self.OCR_JSON_to_file['OCR_handwritten'] = self.hand_organized_text
+            #     self.OCR_JSON_to_file['OCR_trOCR'] = self.trOCR_texts
+            #     logger.info(f"Google_OCR_Standard:\n{self.normal_organized_text}\n\nGoogle_OCR_Handwriting:\n{self.hand_organized_text}\n\ntrOCR:\n{self.trOCR_texts}")
+            #     ocr_parts = ocr_parts +  f"\nGoogle_OCR_Standard:\n{self.normal_organized_text}\n\nGoogle_OCR_Handwriting:\n{self.hand_organized_text}\n\ntrOCR:\n{self.trOCR_texts}"
+            if 'CRAFT' in self.OCR_option:
+                # self.OCR_JSON_to_file['OCR_printed'] = self.normal_organized_text
+                self.OCR_JSON_to_file['OCR_CRAFT_trOCR'] = self.trOCR_texts
+                logger.info(f"CRAFT_trOCR:\n{self.trOCR_texts}")
+                # ocr_parts = ocr_parts +  f"\nCRAFT_trOCR:\n{self.trOCR_texts}"
+                ocr_parts = self.trOCR_texts
+            return ocr_parts
     @staticmethod
     def confidence_to_color(confidence):
             if option == 'trOCR':
                 color = (0, 170, 255)
             else:
+                color = OCREngine.confidence_to_color(confidence)
             position = (bound["vertices"][0]["x"], bound["vertices"][0]["y"] - char_height)
             draw.text(position, character, fill=color, font=font)
                     bound["vertices"][2]["x"], bound["vertices"][2]["y"],
                     bound["vertices"][3]["x"], bound["vertices"][3]["y"],
                 ],
+                outline=OCREngine.BBOX_COLOR,
                 width=line_width_thin
             )
         # Draw a line segment at the bottom of each handwritten character
         for bound, confidence in zip(bounds, confidences):
+            color = OCREngine.confidence_to_color(confidence)
             # Use the bottom two vertices of the bounding box for the line
             bottom_left = (bound["vertices"][3]["x"], bound["vertices"][3]["y"] + line_width_thick)
             bottom_right = (bound["vertices"][2]["x"], bound["vertices"][2]["y"] + line_width_thick)
         self.normal_height = height_flat
         self.normal_confidences = confidences
         self.normal_characters = characters
+        return self.normal_cleaned_text
     def detect_handwritten_ocr(self):
         self.hand_height = height_flat
         self.hand_confidences = confidences
         self.hand_characters = characters
+        return self.hand_cleaned_text
     def process_image(self, do_create_OCR_helper_image, logger):
+        # Can stack options, so solitary if statements
+        self.OCR = 'OCR:\n'
+        if 'CRAFT' in self.OCR_option:
+            self.do_use_trOCR = True
+            self.detect_text_craft()
+            ### Optionally add trOCR to the self.OCR for additional context
+            if self.double_OCR:
+                part_OCR = "\CRAFT trOCR:\n" + self.detect_text_with_trOCR_using_google_bboxes(self.do_use_trOCR, logger)
+                self.OCR = self.OCR + part_OCR + part_OCR
+            else:
+                self.OCR = self.OCR + "\CRAFT trOCR:\n" + self.detect_text_with_trOCR_using_google_bboxes(self.do_use_trOCR, logger)
+            logger.info(f"CRAFT trOCR:\n{self.OCR}")
+        if 'LLaVA' in self.OCR_option: # This option does not produce an OCR helper image
+            self.json_report.set_text(text_main=f'Working on LLaVA {self.Llava.model_path} transcription :construction:')
+            image, json_output, direct_output, str_output, usage_report = self.Llava.transcribe_image(self.path, self.llava_prompt)
+            self.logger.info(f"LLaVA Usage Report for Model {self.Llava.model_path}:\n{usage_report}")
+            try:
+                self.OCR_JSON_to_file['OCR_LLaVA'] = str_output
+            except:
+                self.OCR_JSON_to_file = {}
+                self.OCR_JSON_to_file['OCR_LLaVA'] = str_output
+            if self.double_OCR:
+                self.OCR = self.OCR + f"\nLLaVA OCR:\n{str_output}" + f"\nLLaVA OCR:\n{str_output}"
+            else:
+                self.OCR = self.OCR + f"\nLLaVA OCR:\n{str_output}"
+            logger.info(f"LLaVA OCR:\n{self.OCR}")
+        if 'normal' in self.OCR_option or 'hand' in self.OCR_option:
+            if 'normal' in self.OCR_option:
+                self.OCR = self.OCR + "\nGoogle Printed OCR:\n" + self.detect_text()
+            if 'hand' in self.OCR_option:
+                self.OCR = self.OCR + "\nGoogle Handwritten OCR:\n" + self.detect_handwritten_ocr()
+            # if self.OCR_option not in ['normal', 'hand', 'both']:
+            #     self.OCR_option = 'both'
+            #     self.detect_text()
+            #     self.detect_handwritten_ocr()
+            ### Optionally add trOCR to the self.OCR for additional context
+            if self.double_OCR:
+                part_OCR = "\ntrOCR:\n" + self.detect_text_with_trOCR_using_google_bboxes(self.do_use_trOCR, logger)
+                self.OCR = self.OCR + part_OCR + part_OCR
+            else:
+                self.OCR = self.OCR + "\ntrOCR:\n" + self.detect_text_with_trOCR_using_google_bboxes(self.do_use_trOCR, logger)
+            logger.info(f"OCR:\n{self.OCR}")
+        if do_create_OCR_helper_image and ('LLaVA' not in self.OCR_option):
             self.image = Image.open(self.path)
+            if 'normal' in self.OCR_option:
                 image_with_boxes_normal = self.draw_boxes('normal')
                 text_image_normal = self.render_text_on_black_image('normal')
                 self.merged_image_normal = self.merge_images(image_with_boxes_normal, text_image_normal)
+            if 'hand' in self.OCR_option:
                 image_with_boxes_hand = self.draw_boxes('hand')
                 text_image_hand = self.render_text_on_black_image('hand')
                 self.merged_image_hand = self.merge_images(image_with_boxes_hand, text_image_hand)
             if self.do_use_trOCR:
+                text_image_trOCR = self.render_text_on_black_image('trOCR')
+            if 'CRAFT' in self.OCR_option:
+                image_with_boxes_normal = self.draw_boxes('normal')
+                self.merged_image_normal = self.merge_images(image_with_boxes_normal, text_image_trOCR)
             ### Merge final overlay image
             ### [original, normal bboxes, normal text]
+            if 'CRAFT' in self.OCR_option or 'normal' in self.OCR_option:
                 self.overlay_image = self.merge_images(Image.open(self.path), self.merged_image_normal)
             ### [original, hand bboxes, hand text]
+            elif 'hand' in self.OCR_option:
                 self.overlay_image = self.merge_images(Image.open(self.path), self.merged_image_hand)
             ### [original, normal bboxes, normal text, hand bboxes, hand text]
             else:
                 self.overlay_image = self.merge_images(Image.open(self.path), self.merge_images(self.merged_image_normal, self.merged_image_hand))
             if self.do_use_trOCR:
+                if 'CRAFT' in self.OCR_option:
+                    heat_map_text = Image.fromarray(cv2.cvtColor(self.prediction_result["heatmaps"]["text_score_heatmap"], cv2.COLOR_BGR2RGB))
+                    heat_map_link = Image.fromarray(cv2.cvtColor(self.prediction_result["heatmaps"]["link_score_heatmap"], cv2.COLOR_BGR2RGB))
+                    self.overlay_image = self.merge_images(self.overlay_image, heat_map_text)
+                    self.overlay_image = self.merge_images(self.overlay_image, heat_map_link)
+                else:
+                    self.overlay_image = self.merge_images(self.overlay_image, text_image_trOCR)
         else:
             self.merged_image_normal = None
             self.merged_image_hand = None
             self.overlay_image = Image.open(self.path)
+        try:
+            empty_cuda_cache()
+        except:
+            pass
 '''

vouchervision/OCR_llava.py ADDED Viewed

	@@ -0,0 +1,324 @@

+import os, re, logging
+import requests
+from PIL import Image
+from io import BytesIO
+import torch
+from transformers import AutoTokenizer, BitsAndBytesConfig, TextStreamer
+from langchain.prompts import PromptTemplate
+from langchain_core.output_parsers import JsonOutputParser
+from langchain_core.pydantic_v1 import BaseModel, Field
+from LLaVA.llava.model import LlavaLlamaForCausalLM
+from LLaVA.llava.model.builder import load_pretrained_model
+from LLaVA.llava.conversation import conv_templates, SeparatorStyle
+from LLaVA.llava.utils import disable_torch_init
+from LLaVA.llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN, IMAGE_PLACEHOLDER
+from LLaVA.llava.mm_utils import tokenizer_image_token, get_model_name_from_path, KeywordsStoppingCriteria, process_images
+from utils_LLM import SystemLoadMonitor
+'''
+Performance expectations system:
+    GPUs:
+        2x RTX6000 Ada
+    CPU:
+        AMD Ryzen threadripper pro 5975wx 32-cores x64 threads
+    RAM:
+        512 GB
+    OS:
+        Ubuntu 22.04.3 LTS
+LLaVA Models:
+    "liuhaotian/llava-v1.6-mistral-7b" --- Model is 20 GB in size --- Mistral-7B
+        --- Full
+            --- Inference time ~6 sec
+            --- VRAM ~18 GB
+        --- 8bit (don't use. author says there is a problem right now, 2024-02-08) anecdotally worse results too
+            --- Inference time ~37 sec
+            --- VRAM ~18 GB
+        --- 4bit
+            --- Inference time ~15 sec
+            --- VRAM ~9 GB
+    "liuhaotian/llava-v1.6-34b" --- Model is 100 GB in size --- Hermes-Yi-34B
+        --- Full
+            --- Inference time ~21 sec
+            --- VRAM ~70 GB
+        --- 8bit (don't use. author says there is a problem right now, 2024-02-08) anecdotally worse results too
+            --- Inference time ~52 sec
+            --- VRAM ~42 GB
+        --- 4bit
+            --- Inference time ~23 sec
+            --- VRAM ~25GB
+    "liuhaotian/llava-v1.6-vicuna-13b" --- Model is 30 GB in size --- Vicuna-13B
+        --- Full
+            --- Inference time ~8 sec
+            --- VRAM ~33 GB
+        --- 8bit (don't use. author says there is a problem right now, 2024-02-08) anecdotally worse results too, has lots of ALL CAPS and mistakes
+            --- Inference time ~32 sec
+            --- VRAM ~23 GB
+        --- 4bit
+            --- Inference time ~12 sec
+            --- VRAM ~15 GB
+    "liuhaotian/llava-v1.6-vicuna-7b" --- Model is 15 GB in size --- Vicuna-7B
+        --- Full
+            --- Inference time ~7 sec
+            --- VRAM ~20 GB
+        --- 8bit (don't use. author says there is a problem right now, 2024-02-08) anecdotally worse results too
+            --- Inference time ~27 sec
+            --- VRAM ~14 GB
+        --- 4bit
+            --- Inference time ~10 sec
+            --- VRAM ~10 GB
+'''
+# OCR_Llava = OCRLlava()
+# image, caption = OCR_Llava.transcribe_image("path/to/image.jpg", "Describe this image.")
+# print(caption)
+# Define the desired data structure for the transcription.
+class Transcription(BaseModel):
+    Transcription: str = Field(description="The transcription of all text in the image.")
+class OCRllava:
+    def __init__(self, logger, model_path="liuhaotian/llava-v1.6-34b",load_in_4bit=False, load_in_8bit=False):
+        self.monitor = SystemLoadMonitor(logger)
+        # self.model_path = "liuhaotian/llava-v1.6-mistral-7b"
+        # self.model_path = "liuhaotian/llava-v1.6-34b"
+        # self.model_path = "liuhaotian/llava-v1.6-vicuna-13b"
+        self.model_path = model_path
+        # kwargs = {"device_map": "auto", "load_in_4bit": load_in_4bit, "quantization_config": BitsAndBytesConfig(
+        #     load_in_4bit=load_in_4bit,
+        #     bnb_4bit_compute_dtype=torch.float16,
+        #     bnb_4bit_use_double_quant=load_in_4bit,
+        #     bnb_4bit_quant_type='nf4'
+        # )}
+        if "llama-2" in self.model_path.lower(): # this is borrowed from def eval_model(args): in run_llava.py
+            self.conv_mode = "llava_llama_2"
+        elif "mistral" in self.model_path.lower():
+            self.conv_mode = "mistral_instruct"
+        elif "v1.6-34b" in self.model_path.lower():
+            self.conv_mode = "chatml_direct"
+        elif "v1" in self.model_path.lower():
+            self.conv_mode = "llava_v1"
+        elif "mpt" in self.model_path.lower():
+            self.conv_mode = "mpt"
+        else:
+            self.conv_mode = "llava_v0"
+        self.tokenizer, self.model, self.image_processor, self.context_len = load_pretrained_model(self.model_path, None,
+                                                                               model_name = get_model_name_from_path(self.model_path),
+                                                                               load_8bit=load_in_8bit, load_4bit=load_in_4bit)
+        # self.model = LlavaLlamaForCausalLM.from_pretrained(self.model_path, low_cpu_mem_usage=True, **kwargs)
+        # self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, use_fast=False)
+        # self.vision_tower = self.model.get_vision_tower()
+        # if not self.vision_tower.is_loaded:
+            # self.vision_tower.load_model()
+        # self.vision_tower.to(device='cuda')
+        # self.image_processor = self.vision_tower.image_processor
+        self.parser = JsonOutputParser(pydantic_object=Transcription)
+    def image_parser(self):
+        sep = ","
+        out = self.image_file.split(sep)
+        return out
+    def load_image(self, image_file):
+        if image_file.startswith("http") or image_file.startswith("https"):
+            response = requests.get(image_file)
+            image = Image.open(BytesIO(response.content)).convert("RGB")
+        else:
+            image = Image.open(image_file).convert("RGB")
+        return image
+    def load_images(self, image_files):
+        out = []
+        for image_file in image_files:
+            image = self.load_image(image_file)
+            out.append(image)
+        return out
+    def combine_json_values(self, data, separator=" "):
+        """
+        Recursively traverses through a JSON-like dictionary or list,
+        combining all the values into a single string with a given separator.
+        :return: A single string containing all values from the input.
+        """
+        # Base case for strings, directly return the string
+        if isinstance(data, str):
+            return data
+        # If the data is a dictionary, iterate through its values
+        elif isinstance(data, dict):
+            combined_string = separator.join(self.combine_json_values(v, separator) for v in data.values())
+        # If the data is a list, iterate through its elements
+        elif isinstance(data, list):
+            combined_string = separator.join(self.combine_json_values(item, separator) for item in data)
+        # For other data types (e.g., numbers), convert to string directly
+        else:
+            combined_string = str(data)
+        return combined_string
+    def transcribe_image(self, image_file, prompt, max_new_tokens=512, temperature=0.1, top_p=None, num_beams=1):
+        self.monitor.start_monitoring_usage()
+        self.image_file = image_file
+        if image_file.startswith('http') or image_file.startswith('https'):
+            response = requests.get(image_file)
+            image = Image.open(BytesIO(response.content)).convert('RGB')
+        else:
+            image = Image.open(image_file).convert('RGB')
+        disable_torch_init()
+        qs = prompt
+        image_token_se = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN
+        if IMAGE_PLACEHOLDER in qs:
+            if self.model.config.mm_use_im_start_end:
+                qs = re.sub(IMAGE_PLACEHOLDER, image_token_se, qs)
+            else:
+                qs = re.sub(IMAGE_PLACEHOLDER, DEFAULT_IMAGE_TOKEN, qs)
+        else:
+            if self.model.config.mm_use_im_start_end:
+                qs = image_token_se + "\n" + qs
+            else:
+                qs = DEFAULT_IMAGE_TOKEN + "\n" + qs
+        conv = conv_templates[self.conv_mode].copy()
+        conv.append_message(conv.roles[0], qs)
+        conv.append_message(conv.roles[1], None)
+        prompt = conv.get_prompt()
+        image_files = self.image_parser()
+        images = self.load_images(image_files)
+        image_sizes = [x.size for x in images]
+        images_tensor = process_images(
+            images,
+            self.image_processor,
+            self.model.config
+        ).to(self.model.device, dtype=torch.float16)
+        input_ids = (
+            tokenizer_image_token(prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt")
+            .unsqueeze(0)
+            .cuda()
+        )
+        with torch.inference_mode():
+            output_ids = self.model.generate(
+                input_ids,
+                images=images_tensor,
+                image_sizes=image_sizes,
+                do_sample=True if temperature > 0 else False,
+                temperature=temperature,
+                # top_p=top_p,
+                num_beams=num_beams,
+                max_new_tokens=max_new_tokens,
+                use_cache=True,
+            )
+        direct_output = self.tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0].strip()
+        # Parse the output to JSON format using the specified schema.
+        try:
+            json_output = self.parser.parse(direct_output)
+        except:
+            json_output = direct_output
+        try:
+            str_output = self.combine_json_values(json_output)
+        except:
+            str_output = direct_output
+        self.monitor.stop_inference_timer() # Starts tool timer too
+        usage_report = self.monitor.stop_monitoring_report_usage()
+        return image, json_output, direct_output, str_output, usage_report
+PROMPT_OCR = """I need you to transcribe all of the text in this image. Place the transcribed text into a JSON dictionary with this form {"Transcription": "text"}"""
+PROMPT_ALL = """1. Refactor the unstructured OCR text into a dictionary based on the JSON structure outlined below.
+2. Map the unstructured OCR text to the appropriate JSON key and populate the field given the user-defined rules.
+3. JSON key values are permitted to remain empty strings if the corresponding information is not found in the unstructured OCR text.
+4. Duplicate dictionary fields are not allowed.
+5. Ensure all JSON keys are in camel case.
+6. Ensure new JSON field values follow sentence case capitalization.
+7. Ensure all key-value pairs in the JSON dictionary strictly adhere to the format and data types specified in the template.
+8. Ensure output JSON string is valid JSON format. It should not have trailing commas or unquoted keys.
+9. Only return a JSON dictionary represented as a string. You should not explain your answer.
+This section provides rules for formatting each JSON value organized by the JSON key.
+{catalogNumber Barcode identifier, typically a number with at least 6 digits, but fewer than 30 digits., order The full scientific name of the order in which the taxon is classified. Order must be capitalized., family The full scientific name of the family in which the taxon is classified. Family must be capitalized., scientificName The scientific name of the taxon including genus, specific epithet, and any lower classifications., scientificNameAuthorship The authorship information for the scientificName formatted according to the conventions of the applicable Darwin Core nomenclaturalCode., genus Taxonomic determination to genus. Genus must be capitalized. If genus is not present use the taxonomic family name followed by the word 'indet'., subgenus The full scientific name of the subgenus in which the taxon is classified. Values should include the genus to avoid homonym confusion., specificEpithet The name of the first or species epithet of the scientificName. Only include the species epithet., infraspecificEpithet The name of the lowest or terminal infraspecific epithet of the scientificName, excluding any rank designation., identifiedBy A comma separated list of names of people, groups, or organizations who assigned the taxon to the subject organism. This is not the specimen collector., recordedBy A comma separated list of names of people, groups, or organizations responsible for observing, recording, collecting, or presenting the original specimen. The primary collector or observer should be listed first., recordNumber An identifier given to the occurrence at the time it was recorded. Often serves as a link between field notes and an occurrence record, such as a specimen collector's number., verbatimEventDate The verbatim original representation of the date and time information for when the specimen was collected. Date of collection exactly as it appears on the label. Do not change the format or correct typos., eventDate Date the specimen was collected formatted as year-month-day, YYYY-MM_DD. If specific components of the date are unknown, they should be replaced with zeros. Examples \0000-00-00\ if the entire date is unknown, \YYYY-00-00\ if only the year is known, and \YYYY-MM-00\ if year and month are known but day is not., habitat A category or description of the habitat in which the specimen collection event occurred., occurrenceRemarks Text describing the specimen's geographic location. Text describing the appearance of the specimen. A statement about the presence or absence of a taxon at a the collection location. Text describing the significance of the specimen, such as a specific expedition or notable collection. Description of plant features such as leaf shape, size, color, stem texture, height, flower structure, scent, fruit or seed characteristics, root system type, overall growth habit and form, any notable aroma or secretions, presence of hairs or bristles, and any other distinguishing morphological or physiological characteristics., country The name of the country or major administrative unit in which the specimen was originally collected., stateProvince The name of the next smaller administrative region than country (state, province, canton, department, region, etc.) in which the specimen was originally collected., county The full, unabbreviated name of the next smaller administrative region than stateProvince (county, shire, department, parish etc.) in which the specimen was originally collected., municipality The full, unabbreviated name of the next smaller administrative region than county (city, municipality, etc.) in which the specimen was originally collected., locality Description of geographic location, landscape, landmarks, regional features, nearby places, or any contextual information aiding in pinpointing the exact origin or location of the specimen., degreeOfEstablishment Cultivated plants are intentionally grown by humans. In text descriptions, look for planting dates, garden locations, ornamental, cultivar names, garden, or farm to indicate cultivated plant. Use either - unknown or cultivated., decimalLatitude Latitude decimal coordinate. Correct and convert the verbatim location coordinates to conform with the decimal degrees GPS coordinate format., decimalLongitude Longitude decimal coordinate. Correct and convert the verbatim location coordinates to conform with the decimal degrees GPS coordinate format., verbatimCoordinates Verbatim location coordinates as they appear on the label. Do not convert formats. Possible coordinate types include [Lat, Long, UTM, TRS]., minimumElevationInMeters Minimum elevation or altitude in meters. Only if units are explicit then convert from feet (\ft\ or \ft.\\ or \feet\) to meters (\m\ or \m.\ or \meters\). Round to integer., maximumElevationInMeters Maximum elevation or altitude in meters. If only one elevation is present, then max_elevation should be set to the null_value. Only if units are explicit then convert from feet (\ft\ or \ft.\ or \feet\) to meters (\m\ or \m.\ or \meters\). Round to integer.}
+Please populate the following JSON dictionary based on the rules and the unformatted OCR text
+{
+catalogNumber ,
+order ,
+family ,
+scientificName ,
+scientificNameAuthorship ,
+genus ,
+subgenus ,
+specificEpithet ,
+infraspecificEpithet ,
+identifiedBy ,
+recordedBy ,
+recordNumber ,
+verbatimEventDate ,
+eventDate ,
+habitat ,
+occurrenceRemarks ,
+country ,
+stateProvince ,
+county ,
+municipality ,
+locality ,
+degreeOfEstablishment ,
+decimalLatitude ,
+decimalLongitude ,
+verbatimCoordinates ,
+minimumElevationInMeters ,
+maximumElevationInMeters
+}
+  """
+if __name__ == '__main__':
+    logger = logging.getLogger('LLaVA')
+    logger.setLevel(logging.DEBUG)
+    OCR_Llava = OCRllava(logger)
+    image, json_output, direct_output, str_output, usage_report = OCR_Llava.transcribe_image("/home/brlab/Dropbox/VoucherVision/demo/demo_images/MICH_16205594_Poaceae_Jouvea_pilosa.jpg",
+                                                                                            PROMPT_OCR)
+    print('json_output')
+    print(json_output)
+    print('direct_output')
+    print(direct_output)
+    print('str_output')
+    print(str_output)
+    print('usage_report')
+    print(usage_report)

vouchervision/VoucherVision_Config_Builder.py CHANGED Viewed

@@ -37,6 +37,10 @@ def build_VV_config(loaded_cfg=None):
         do_use_trOCR = False
         OCR_option = 'hand'
         check_for_illegal_filenames = False
         LLM_version_user = 'Azure GPT 3.5 Instruct' #'Azure GPT 4 Turbo 1106-preview'
@@ -47,6 +51,9 @@ def build_VV_config(loaded_cfg=None):
         batch_size = 500
         num_workers = 8
         path_domain_knowledge = os.path.join(dir_home,'domain_knowledge','SLTP_UM_AllAsiaMinimalInRegion.xlsx')
         embeddings_database_name = os.path.splitext(os.path.basename(path_domain_knowledge))[0]
@@ -58,8 +65,8 @@ def build_VV_config(loaded_cfg=None):
         return assemble_config(dir_home, run_name, dir_images_local,dir_output,
                         prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,num_workers,
                         path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
-                        prompt_version, do_create_OCR_helper_image, do_use_trOCR, OCR_option, save_cropped_annotations,
-                        check_for_illegal_filenames, use_domain_knowledge=False)
     else:
         dir_home = os.path.dirname(os.path.dirname(__file__))
         run_name = loaded_cfg['leafmachine']['project']['run_name']
@@ -74,6 +81,11 @@ def build_VV_config(loaded_cfg=None):
         do_use_trOCR = loaded_cfg['leafmachine']['project']['do_use_trOCR']
         OCR_option = loaded_cfg['leafmachine']['project']['OCR_option']
         LLM_version_user = loaded_cfg['leafmachine']['LLM_version']
         prompt_version = loaded_cfg['leafmachine']['project']['prompt_version']
@@ -88,19 +100,20 @@ def build_VV_config(loaded_cfg=None):
         save_cropped_annotations = loaded_cfg['leafmachine']['cropped_components']['save_cropped_annotations']
         check_for_illegal_filenames = loaded_cfg['leafmachine']['do']['check_for_illegal_filenames']
         return assemble_config(dir_home, run_name, dir_images_local,dir_output,
                         prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,num_workers,
                         path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
-                        prompt_version, do_create_OCR_helper_image, do_use_trOCR, OCR_option, save_cropped_annotations,
-                        check_for_illegal_filenames, use_domain_knowledge=False)
 def assemble_config(dir_home, run_name, dir_images_local,dir_output,
                     prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,num_workers,
                     path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
-                    prompt_version, do_create_OCR_helper_image_user, do_use_trOCR, OCR_option, save_cropped_annotations,
-                    check_for_illegal_filenames, use_domain_knowledge=False):
     # Initialize the base structure
@@ -112,6 +125,7 @@ def assemble_config(dir_home, run_name, dir_images_local,dir_output,
     do_section = {
         'check_for_illegal_filenames': check_for_illegal_filenames,
         'check_for_corrupt_images_make_vertical': True,
     }
     print_section = {
@@ -144,6 +158,10 @@ def assemble_config(dir_home, run_name, dir_images_local,dir_output,
         'delete_temps_keep_VVE': False,
         'do_use_trOCR': do_use_trOCR,
         'OCR_option': OCR_option,
     }
     modules_section = {

         do_use_trOCR = False
         OCR_option = 'hand'
+        OCR_option_llava = 'llava-v1.6-mistral-7b' # "llava-v1.6-mistral-7b", "llava-v1.6-34b", "llava-v1.6-vicuna-13b", "llava-v1.6-vicuna-7b",
+        OCR_option_llava_bit = 'full' # full or 4bit
+        double_OCR = False
         check_for_illegal_filenames = False
         LLM_version_user = 'Azure GPT 3.5 Instruct' #'Azure GPT 4 Turbo 1106-preview'
         batch_size = 500
         num_workers = 8
+        skip_vertical = False
+        pdf_conversion_dpi = 100
         path_domain_knowledge = os.path.join(dir_home,'domain_knowledge','SLTP_UM_AllAsiaMinimalInRegion.xlsx')
         embeddings_database_name = os.path.splitext(os.path.basename(path_domain_knowledge))[0]
         return assemble_config(dir_home, run_name, dir_images_local,dir_output,
                         prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,num_workers,
                         path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
+                        prompt_version, do_create_OCR_helper_image, do_use_trOCR, OCR_option, OCR_option_llava, OCR_option_llava_bit, double_OCR, save_cropped_annotations,
+                        check_for_illegal_filenames, skip_vertical, pdf_conversion_dpi, use_domain_knowledge=False)
     else:
         dir_home = os.path.dirname(os.path.dirname(__file__))
         run_name = loaded_cfg['leafmachine']['project']['run_name']
         do_use_trOCR = loaded_cfg['leafmachine']['project']['do_use_trOCR']
         OCR_option = loaded_cfg['leafmachine']['project']['OCR_option']
+        OCR_option_llava = loaded_cfg['leafmachine']['project']['OCR_option_llava']
+        OCR_option_llava_bit  = loaded_cfg['leafmachine']['project']['OCR_option_llava_bit']
+        double_OCR = loaded_cfg['leafmachine']['project']['double_OCR']
+        pdf_conversion_dpi = loaded_cfg['leafmachine']['project']['pdf_conversion_dpi']
         LLM_version_user = loaded_cfg['leafmachine']['LLM_version']
         prompt_version = loaded_cfg['leafmachine']['project']['prompt_version']
         save_cropped_annotations = loaded_cfg['leafmachine']['cropped_components']['save_cropped_annotations']
         check_for_illegal_filenames = loaded_cfg['leafmachine']['do']['check_for_illegal_filenames']
+        skip_vertical = loaded_cfg['leafmachine']['do']['skip_vertical']
         return assemble_config(dir_home, run_name, dir_images_local,dir_output,
                         prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,num_workers,
                         path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
+                        prompt_version, do_create_OCR_helper_image, do_use_trOCR, OCR_option, OCR_option_llava, OCR_option_llava_bit, double_OCR, save_cropped_annotations,
+                        check_for_illegal_filenames, skip_vertical, pdf_conversion_dpi, use_domain_knowledge=False)
 def assemble_config(dir_home, run_name, dir_images_local,dir_output,
                     prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,num_workers,
                     path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
+                    prompt_version, do_create_OCR_helper_image_user, do_use_trOCR, OCR_option, OCR_option_llava, OCR_option_llava_bit, double_OCR, save_cropped_annotations,
+                    check_for_illegal_filenames, skip_vertical, pdf_conversion_dpi, use_domain_knowledge=False):
     # Initialize the base structure
     do_section = {
         'check_for_illegal_filenames': check_for_illegal_filenames,
         'check_for_corrupt_images_make_vertical': True,
+        'skip_vertical': skip_vertical,
     }
     print_section = {
         'delete_temps_keep_VVE': False,
         'do_use_trOCR': do_use_trOCR,
         'OCR_option': OCR_option,
+        'OCR_option_llava': OCR_option_llava,
+        'OCR_option_llava_bit': OCR_option_llava_bit,
+        'double_OCR': double_OCR,
+        'pdf_conversion_dpi': pdf_conversion_dpi,
     }
     modules_section = {

vouchervision/data_project.py CHANGED Viewed

@@ -12,6 +12,19 @@ from vouchervision.download_from_GBIF_all_images_in_file import download_all_ima
 from PIL import Image
 from tqdm import tqdm
 from pathlib import Path
 @dataclass
 class Project_Info():
@@ -39,6 +52,7 @@ class Project_Info():
         self.Dirs = Dirs
         logger.name = 'Project Info'
         logger.info("Gathering Images and Image Metadata")
         self.batch_size = cfg['leafmachine']['project']['batch_size']
@@ -90,15 +104,28 @@ class Project_Info():
     def remove_non_numbers(self, s):
         return ''.join([char for char in s if char.isdigit()])
     def copy_images_to_project_dir(self, dir_images, Dirs):
         n_total = len(os.listdir(dir_images))
-        for file in tqdm(os.listdir(dir_images), desc=f'{bcolors.HEADER}     Copying images to working directory{bcolors.ENDC}',colour="white",position=0,total = n_total):
-            # Copy og image to new dir
-            # Copied image will be used for all downstream applications
             source = os.path.join(dir_images, file)
-            destination = os.path.join(Dirs.save_original, file)
-            shutil.copy(source, destination)
     def make_file_names_custom(self, dir_images, cfg, Dirs):
         n_total = len(os.listdir(dir_images))
         for file in tqdm(os.listdir(dir_images), desc=f'{bcolors.HEADER}     Creating Catalog Number from file name{bcolors.ENDC}',colour="green",position=0,total = n_total):

 from PIL import Image
 from tqdm import tqdm
 from pathlib import Path
+import fitz
+def convert_pdf_to_jpg(source_pdf, destination_dir, dpi=100):
+    doc = fitz.open(source_pdf)
+    for page_num in range(len(doc)):
+        page = doc.load_page(page_num)  # Load the current page
+        pix = page.get_pixmap(dpi=dpi)  # Render page to an image
+        output_filename = f"{os.path.splitext(os.path.basename(source_pdf))[0]}__{10000 + page_num + 1}.jpg"
+        output_filepath = os.path.join(destination_dir, output_filename)
+        pix.save(output_filepath)  # Save the image
+    length_doc = len(doc)
+    doc.close()
+    return length_doc
 @dataclass
 class Project_Info():
         self.Dirs = Dirs
         logger.name = 'Project Info'
         logger.info("Gathering Images and Image Metadata")
+        self.logger = logger
         self.batch_size = cfg['leafmachine']['project']['batch_size']
     def remove_non_numbers(self, s):
         return ''.join([char for char in s if char.isdigit()])
+    # def copy_images_to_project_dir(self, dir_images, Dirs):
+    #     n_total = len(os.listdir(dir_images))
+    #     for file in tqdm(os.listdir(dir_images), desc=f'{bcolors.HEADER}     Copying images to working directory{bcolors.ENDC}',colour="white",position=0,total = n_total):
+    #         # Copy og image to new dir
+    #         # Copied image will be used for all downstream applications
+    #         source = os.path.join(dir_images, file)
+    #         destination = os.path.join(Dirs.save_original, file)
+    #         shutil.copy(source, destination)
     def copy_images_to_project_dir(self, dir_images, Dirs):
         n_total = len(os.listdir(dir_images))
+        for file in tqdm(os.listdir(dir_images), desc=f'{bcolors.HEADER}     Copying images to working directory{bcolors.ENDC}', colour="white", position=0, total=n_total):
             source = os.path.join(dir_images, file)
+            # Check if file is a PDF
+            if file.lower().endswith('.pdf'):
+                # Convert PDF pages to JPG images
+                n_pages = convert_pdf_to_jpg(source, Dirs.save_original)
+                self.logger.info(f"Converted {n_pages} pages to JPG from PDF: {file}")
+            else:
+                # Copy non-PDF files directly
+                destination = os.path.join(Dirs.save_original, file)
+                shutil.copy(source, destination)
     def make_file_names_custom(self, dir_images, cfg, Dirs):
         n_total = len(os.listdir(dir_images))
         for file in tqdm(os.listdir(dir_images), desc=f'{bcolors.HEADER}     Creating Catalog Number from file name{bcolors.ENDC}',colour="green",position=0,total = n_total):

vouchervision/general_utils.py CHANGED Viewed

@@ -437,6 +437,7 @@ def split_into_batches(Project, logger, cfg):
     return Project, n_batches, m
 def make_images_in_dir_vertical(dir_images_unprocessed, cfg):
     if cfg['leafmachine']['do']['check_for_corrupt_images_make_vertical']:
         n_rotate = 0
         n_corrupt = 0
@@ -445,10 +446,11 @@ def make_images_in_dir_vertical(dir_images_unprocessed, cfg):
             if image_name_jpg.endswith((".jpg",".JPG",".jpeg",".JPEG")):
                 try:
                     image = cv2.imread(os.path.join(dir_images_unprocessed, image_name_jpg))
-                    h, w, img_c = image.shape
-                    image, img_h, img_w, did_rotate = make_image_vertical(image, h, w, do_rotate_180=False)
-                    if did_rotate:
-                        n_rotate += 1
                     cv2.imwrite(os.path.join(dir_images_unprocessed,image_name_jpg), image)
                 except:
                     n_corrupt +=1
@@ -457,10 +459,11 @@ def make_images_in_dir_vertical(dir_images_unprocessed, cfg):
             elif image_name_jpg.endswith((".tiff",".tif",".png",".PNG",".TIFF",".TIF",".jp2",".JP2",".bmp",".BMP",".dib",".DIB")):
                 try:
                     image = cv2.imread(os.path.join(dir_images_unprocessed, image_name_jpg))
-                    h, w, img_c = image.shape
-                    image, img_h, img_w, did_rotate = make_image_vertical(image, h, w, do_rotate_180=False)
-                    if did_rotate:
-                        n_rotate += 1
                     image_name_jpg = '.'.join([image_name_jpg.split('.')[0], 'jpg'])
                     cv2.imwrite(os.path.join(dir_images_unprocessed,image_name_jpg), image)
                 except:

     return Project, n_batches, m
 def make_images_in_dir_vertical(dir_images_unprocessed, cfg):
+    skip_vertical = cfg['leafmachine']['do']['skip_vertical']
     if cfg['leafmachine']['do']['check_for_corrupt_images_make_vertical']:
         n_rotate = 0
         n_corrupt = 0
             if image_name_jpg.endswith((".jpg",".JPG",".jpeg",".JPEG")):
                 try:
                     image = cv2.imread(os.path.join(dir_images_unprocessed, image_name_jpg))
+                    if not skip_vertical:
+                        h, w, img_c = image.shape
+                        image, img_h, img_w, did_rotate = make_image_vertical(image, h, w, do_rotate_180=False)
+                        if did_rotate:
+                            n_rotate += 1
                     cv2.imwrite(os.path.join(dir_images_unprocessed,image_name_jpg), image)
                 except:
                     n_corrupt +=1
             elif image_name_jpg.endswith((".tiff",".tif",".png",".PNG",".TIFF",".TIF",".jp2",".JP2",".bmp",".BMP",".dib",".DIB")):
                 try:
                     image = cv2.imread(os.path.join(dir_images_unprocessed, image_name_jpg))
+                    if not skip_vertical:
+                        h, w, img_c = image.shape
+                        image, img_h, img_w, did_rotate = make_image_vertical(image, h, w, do_rotate_180=False)
+                        if did_rotate:
+                            n_rotate += 1
                     image_name_jpg = '.'.join([image_name_jpg.split('.')[0], 'jpg'])
                     cv2.imwrite(os.path.join(dir_images_unprocessed,image_name_jpg), image)
                 except:

vouchervision/llava_test.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from LLaVA.llava.model.builder import load_pretrained_model
+from LLaVA.llava.mm_utils import get_model_name_from_path
+from LLaVA.llava.eval.run_llava import eval_model
+# model_path = "liuhaotian/llava-v1.5-7b"
+# tokenizer, model, image_processor, context_len = load_pretrained_model(
+#     model_path=model_path,
+#     model_base=None,
+#     model_name=get_model_name_from_path(model_path)
+# )
+# model_path = "liuhaotian/llava-v1.5-7b"
+# model_path = "liuhaotian/llava-v1.6-mistral-7b"
+model_path = "liuhaotian/llava-v1.6-34b"
+prompt = """I need you to transcribe all of the text in this image. Place the transcribed text into a JSON dictionary with this form {"Transcription": "text"}"""
+# image_file = "https://llava-vl.github.io/static/images/view.jpg"
+image_file = "/home/brlab/Dropbox/VoucherVision/demo/demo_images/MICH_16205594_Poaceae_Jouvea_pilosa.jpg"
+args = type('Args', (), {
+    "model_path": model_path,
+    "model_base": None,
+    "model_name": get_model_name_from_path(model_path),
+    "query": prompt,
+    "conv_mode": None,
+    "image_file": image_file,
+    "sep": ",",
+    "temperature": 0,
+    "top_p": None,
+    "num_beams": 1,
+    "max_new_tokens": 512,
+    # "load_8_bit": True,
+})()
+eval_model(args)

vouchervision/utils_LLM.py CHANGED Viewed

@@ -49,7 +49,7 @@ class SystemLoadMonitor():
     def __init__(self, logger) -> None:
         self.monitoring_thread = None
         self.logger = logger
-        self.gpu_usage = {'max_cpu_usage': 0, 'max_load': 0, 'max_vram_usage': 0, "max_ram_usage": 0, 'monitoring': True}
         self.start_time = None
         self.tool_start_time = None
         self.has_GPU = torch.cuda.is_available()
@@ -71,11 +71,17 @@ class SystemLoadMonitor():
             # GPU monitoring
             if self.has_GPU:
                 GPUs = GPUtil.getGPUs()
                 for gpu in GPUs:
-                    self.gpu_usage['max_load'] = max(self.gpu_usage['max_load'], gpu.load)
-                    # Convert memory usage to GB
-                    memory_usage_gb = gpu.memoryUsed / 1024.0
-                    self.gpu_usage['max_vram_usage'] = max(self.gpu_usage.get('max_vram_usage', 0), memory_usage_gb)
             # RAM monitoring
             ram_usage = psutil.virtual_memory().used / (1024.0 ** 3)  # Get RAM usage in GB
@@ -94,46 +100,91 @@ class SystemLoadMonitor():
         return datetime_iso
     def stop_monitoring_report_usage(self):
-        report = {}
         self.gpu_usage['monitoring'] = False
         self.monitoring_thread.join()
-        # Calculate tool time by checking if tool_start_time is set
-        if self.tool_start_time:
-            tool_time = time.time() - self.tool_start_time
-        else:
-            tool_time = 0
-        report = {'inference_time_s': str(round(self.inference_time,2)),
-                  'tool_time_s': str(round(tool_time, 2)),
-                  'max_cpu': str(round(self.gpu_usage['max_cpu_usage'],2)),
-                  'max_ram_gb': str(round(self.gpu_usage['max_ram_usage'],2)),
-                  'current_time': self.get_current_datetime(),
         }
         self.logger.info(f"Inference Time: {round(self.inference_time,2)} seconds")
         self.logger.info(f"Tool Time: {round(tool_time,2)} seconds")
         self.logger.info(f"Max CPU Usage: {round(self.gpu_usage['max_cpu_usage'],2)}%")
-        self.logger.info(f"Max RAM Usage: {round(self.gpu_usage['max_ram_usage'],2)}GB")
         if self.has_GPU:
-          report.update({'max_gpu_load': str(round(self.gpu_usage['max_load']*100,2))})
-          report.update({'max_gpu_vram_gb': str(round(self.gpu_usage['max_vram_usage'],2))})
-          self.logger.info(f"Max GPU Load: {round(self.gpu_usage['max_load']*100,2)}%")
-          self.logger.info(f"Max GPU Memory Usage: {round(self.gpu_usage['max_vram_usage'],2)}GB")
         else:
-            report.update({'max_gpu_load': str(0)})
-            report.update({'max_gpu_vram_gb': str(0)})
         return report

     def __init__(self, logger) -> None:
         self.monitoring_thread = None
         self.logger = logger
+        self.gpu_usage = {'max_cpu_usage': 0, 'max_load': 0, 'max_vram_usage': 0, "max_ram_usage": 0, 'n_gpus': 0, 'monitoring': True}
         self.start_time = None
         self.tool_start_time = None
         self.has_GPU = torch.cuda.is_available()
             # GPU monitoring
             if self.has_GPU:
                 GPUs = GPUtil.getGPUs()
+                self.gpu_usage['n_gpus'] = len(GPUs)  # Count the number of GPUs
+                total_load = 0
+                total_memory_usage_gb = 0
                 for gpu in GPUs:
+                    total_load += gpu.load
+                    total_memory_usage_gb += gpu.memoryUsed / 1024.0
+                if self.gpu_usage['n_gpus'] > 0:  # Avoid division by zero
+                    # Calculate the average load and memory usage across all GPUs
+                    self.gpu_usage['max_load'] = max(self.gpu_usage['max_load'], total_load / self.gpu_usage['n_gpus'])
+                    self.gpu_usage['max_vram_usage'] = max(self.gpu_usage['max_vram_usage'], total_memory_usage_gb)
             # RAM monitoring
             ram_usage = psutil.virtual_memory().used / (1024.0 ** 3)  # Get RAM usage in GB
         return datetime_iso
     def stop_monitoring_report_usage(self):
         self.gpu_usage['monitoring'] = False
         self.monitoring_thread.join()
+        tool_time = time.time() - self.tool_start_time if self.tool_start_time else 0
+        num_gpus, gpu_dict, total_vram_gb, capability_score = check_system_gpus()
+        report = {
+            'inference_time_s': str(round(self.inference_time, 2)),
+            'tool_time_s': str(round(tool_time, 2)),
+            'max_cpu': str(round(self.gpu_usage['max_cpu_usage'], 2)),
+            'max_ram_gb': str(round(self.gpu_usage['max_ram_usage'], 2)),
+            'current_time': self.get_current_datetime(),
+            'n_gpus': self.gpu_usage['n_gpus'],
+            'total_gpu_vram_gb':total_vram_gb,
+            'capability_score':capability_score,
         }
         self.logger.info(f"Inference Time: {round(self.inference_time,2)} seconds")
         self.logger.info(f"Tool Time: {round(tool_time,2)} seconds")
         self.logger.info(f"Max CPU Usage: {round(self.gpu_usage['max_cpu_usage'],2)}%")
+        self.logger.info(f"Max RAM Usage: {round(self.gpu_usage['max_ram_usage'],2)}GB")
         if self.has_GPU:
+            report.update({'max_gpu_load': str(round(self.gpu_usage['max_load'] * 100, 2))})
+            report.update({'max_gpu_vram_gb': str(round(self.gpu_usage['max_vram_usage'], 2))})
+            self.logger.info(f"Max GPU Load: {round(self.gpu_usage['max_load'] * 100, 2)}%")
+            self.logger.info(f"Max GPU Memory Usage: {round(self.gpu_usage['max_vram_usage'], 2)}GB")
         else:
+            report.update({'max_gpu_load': '0'})
+            report.update({'max_gpu_vram_gb': '0'})
         return report
+def check_system_gpus():
+    print(f"Torch CUDA: {torch.cuda.is_available()}")
+    # if not torch.cuda.is_available():
+    #     return 0, {}, 0, "no_gpu"
+    GPUs = GPUtil.getGPUs()
+    num_gpus = len(GPUs)
+    gpu_dict = {}
+    total_vram = 0
+    for i, gpu in enumerate(GPUs):
+        gpu_vram = gpu.memoryTotal  # VRAM in MB
+        gpu_dict[f"GPU_{i}"] = f"{gpu_vram / 1024} GB"  # Convert to GB
+        total_vram += gpu_vram
+    total_vram_gb = total_vram / 1024  # Convert total VRAM to GB
+    capability_score_map = {
+        "no_gpu": 0,
+        "class_8GB": 10,
+        "class_12GB": 14,
+        "class_16GB": 18,
+        "class_24GB": 26,
+        "class_48GB": 50,
+        "class_96GB": 100,
+        "class_96GBplus": float('inf'),  # Use infinity to represent any value greater than 96GB
+    }
+    # Determine the capability score based on the total VRAM
+    capability_score = "no_gpu"
+    for score, vram in capability_score_map.items():
+        if total_vram_gb <= vram:
+            capability_score = score
+            break
+    else:
+        capability_score = "class_max"
+    return num_gpus, gpu_dict, total_vram_gb, capability_score
+if __name__ == '__main__':
+    num_gpus, gpu_dict, total_vram_gb, capability_score = check_system_gpus()
+    print(f"Number of GPUs: {num_gpus}")
+    print(f"GPU Details: {gpu_dict}")
+    print(f"Total VRAM: {total_vram_gb} GB")
+    print(f"Capability Score: {capability_score}")

vouchervision/utils_LLM_JSON_validation.py CHANGED Viewed

@@ -11,7 +11,7 @@ def validate_and_align_JSON_keys_with_template(data, JSON_dict_structure):
             if value is None:
                 data[key] = ''
             elif isinstance(value, str):
-                if value.lower() in ['unknown', 'not provided', 'missing', 'na', 'none', 'n/a', 'null',
                                     'not provided in the text', 'not found in the text',
                                     'not in the text', 'not provided', 'not found',
                                     'not provided in the ocr', 'not found in the ocr',

             if value is None:
                 data[key] = ''
             elif isinstance(value, str):
+                if value.lower() in ['unknown', 'not provided', 'missing', 'na', 'none', 'n/a', 'null', 'unspecified',
                                     'not provided in the text', 'not found in the text',
                                     'not in the text', 'not provided', 'not found',
                                     'not provided in the ocr', 'not found in the ocr',

vouchervision/utils_VoucherVision.py CHANGED Viewed

@@ -5,10 +5,8 @@ from openpyxl import Workbook, load_workbook
 import vertexai
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 from langchain_openai import AzureChatOpenAI
-from OCR_google_cloud_vision import OCRGoogle
-# import google.generativeai as genai
 from google.oauth2 import service_account
-# from googleapiclient.discovery import build
 from vouchervision.LLM_OpenAI import OpenAIHandler
 from vouchervision.LLM_GooglePalm2 import GooglePalm2Handler
@@ -20,6 +18,7 @@ from vouchervision.utils_LLM import remove_colons_and_double_apostrophes
 from vouchervision.prompt_catalog import PromptCatalog
 from vouchervision.model_maps import ModelMaps
 from vouchervision.general_utils import get_cfg_from_full_path
 '''
 * For the prefix_removal, the image names have 'MICH-V-' prior to the barcode, so that is used for matching
@@ -44,9 +43,11 @@ class VoucherVision():
         self.prompt_version = None
         self.is_hf = is_hf
-        # self.trOCR_model_version = "microsoft/trocr-large-handwritten"
-        self.trOCR_model_version = "microsoft/trocr-base-handwritten"
-        # self.trOCR_model_version = "dh-unibe/trocr-medieval-escriptmask"
         self.trOCR_processor = None
         self.trOCR_model = None
@@ -77,12 +78,12 @@ class VoucherVision():
                        "GEO_decimal_long","GEO_city", "GEO_county", "GEO_state",
                        "GEO_state_code", "GEO_country", "GEO_country_code", "GEO_continent",]
-        self.usage_headers = ["current_time", "inference_time_s", "tool_time_s","max_cpu", "max_ram_gb", "max_gpu_load", "max_gpu_vram_gb",]
         self.wfo_headers = ["WFO_override_OCR", "WFO_exact_match","WFO_exact_match_name","WFO_best_match","WFO_candidate_names","WFO_placement"]
         self.wfo_headers_no_lists = ["WFO_override_OCR", "WFO_exact_match","WFO_exact_match_name","WFO_best_match","WFO_placement"]
-        self.utility_headers = ["filename"] + self.wfo_headers + self.geo_headers + self.usage_headers + ["prompt", "LLM", "tokens_in", "tokens_out", "path_to_crop","path_to_original","path_to_content","path_to_helper",]
                                 # "WFO_override_OCR", "WFO_exact_match","WFO_exact_match_name","WFO_best_match","WFO_candidate_names","WFO_placement",
                                 # "GEO_override_OCR", "GEO_method", "GEO_formatted_full_string", "GEO_decimal_lat",
@@ -117,8 +118,8 @@ class VoucherVision():
         lgr = logging.getLogger('transformers')
         lgr.setLevel(logging.ERROR)
-        self.trOCR_processor = TrOCRProcessor.from_pretrained(self.trOCR_model_version)
-        self.trOCR_model = VisionEncoderDecoderModel.from_pretrained(self.trOCR_model_version)
         # Check for GPU availability
         self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
@@ -297,7 +298,7 @@ class VoucherVision():
                     break
-    def add_data_to_excel_from_response(self, path_transcription, response, WFO_record, GEO_record, usage_report, MODEL_NAME_FORMATTED, filename_without_extension, path_to_crop, path_to_content, path_to_helper, nt_in, nt_out):
         wb = openpyxl.load_workbook(path_transcription)
@@ -364,6 +365,8 @@ class VoucherVision():
                 sheet.cell(row=next_row, column=i, value=filename_without_extension)
             elif header.value == "prompt":
                 sheet.cell(row=next_row, column=i, value=os.path.basename(self.path_custom_prompts))
             # "WFO_exact_match","WFO_exact_match_name","WFO_best_match","WFO_candidate_names","WFO_placement"
             elif header.value in self.wfo_headers_no_lists:
@@ -613,12 +616,12 @@ class VoucherVision():
     ##################################################################################################################################
     ##################################################     OCR      ##################################################################
     ##################################################################################################################################
-    def perform_OCR_and_save_results(self, image_index, jpg_file_path_OCR_helper, txt_file_path_OCR, txt_file_path_OCR_bounds):
         self.logger.info(f'Working on {image_index + 1}/{len(self.img_paths)} --- Starting OCR')
         # self.OCR - None
         ### Process_image() runs the OCR for text, handwriting, trOCR AND creates the overlay image
-        ocr_google = OCRGoogle(self.is_hf, self.path_to_crop, self.cfg, self.trOCR_model_version, self.trOCR_model, self.trOCR_processor, self.device)
         ocr_google.process_image(self.do_create_OCR_helper_image, self.logger)
         self.OCR = ocr_google.OCR
@@ -682,7 +685,7 @@ class VoucherVision():
             filename_without_extension, txt_file_path, txt_file_path_OCR, txt_file_path_OCR_bounds, jpg_file_path_OCR_helper, json_file_path_wiki, txt_file_path_ind_prompt = paths
             json_report.set_text(text_main='Starting OCR')
-            self.perform_OCR_and_save_results(i, jpg_file_path_OCR_helper, txt_file_path_OCR, txt_file_path_OCR_bounds)
             json_report.set_text(text_main='Finished OCR')
             if not self.OCR:
@@ -797,10 +800,10 @@ class VoucherVision():
         filename_without_extension, txt_file_path, txt_file_path_OCR, txt_file_path_OCR_bounds, jpg_file_path_OCR_helper, json_file_path_wiki, txt_file_path_ind_prompt = paths
         # Saving the JSON and XLSX files with the response and updating the final JSON response
         if response_candidate is not None:
-            final_JSON_response_updated = self.save_json_and_xlsx(response_candidate, WFO_record, GEO_record, usage_report, MODEL_NAME_FORMATTED, filename_without_extension, path_to_crop, txt_file_path, jpg_file_path_OCR_helper, nt_in, nt_out)
             return final_JSON_response_updated, WFO_record, GEO_record
         else:
-            final_JSON_response_updated = self.save_json_and_xlsx(response_candidate, WFO_record, GEO_record, usage_report, MODEL_NAME_FORMATTED, filename_without_extension, path_to_crop, txt_file_path, jpg_file_path_OCR_helper, nt_in, nt_out)
             return final_JSON_response_updated, WFO_record, GEO_record
@@ -836,7 +839,7 @@ class VoucherVision():
         return filename_without_extension, txt_file_path, txt_file_path_OCR, txt_file_path_OCR_bounds, jpg_file_path_OCR_helper, json_file_path_wiki, txt_file_path_ind_prompt
-    def save_json_and_xlsx(self, response, WFO_record, GEO_record, usage_report, MODEL_NAME_FORMATTED, filename_without_extension, path_to_crop, txt_file_path, jpg_file_path_OCR_helper, nt_in, nt_out):
         if response is None:
             response = self.JSON_dict_structure
             # Insert 'filename' as the first key
@@ -845,14 +848,14 @@ class VoucherVision():
             # Then add the null info to the spreadsheet
             response_null = self.create_null_row(filename_without_extension, path_to_crop, txt_file_path, jpg_file_path_OCR_helper)
-            self.add_data_to_excel_from_response(self.path_transcription, response_null, WFO_record, GEO_record, usage_report, MODEL_NAME_FORMATTED, filename_without_extension, path_to_crop, txt_file_path, jpg_file_path_OCR_helper, nt_in=0, nt_out=0)
         ### Set completed JSON
         else:
             response = self.clean_catalog_number(response, filename_without_extension)
             self.write_json_to_file(txt_file_path, response)
             # add to the xlsx file
-            self.add_data_to_excel_from_response(self.path_transcription, response, WFO_record, GEO_record, usage_report, MODEL_NAME_FORMATTED, filename_without_extension, path_to_crop, txt_file_path, jpg_file_path_OCR_helper, nt_in, nt_out)
         return response

 import vertexai
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 from langchain_openai import AzureChatOpenAI
 from google.oauth2 import service_account
+from transformers import AutoTokenizer, AutoModel
 from vouchervision.LLM_OpenAI import OpenAIHandler
 from vouchervision.LLM_GooglePalm2 import GooglePalm2Handler
 from vouchervision.prompt_catalog import PromptCatalog
 from vouchervision.model_maps import ModelMaps
 from vouchervision.general_utils import get_cfg_from_full_path
+from vouchervision.OCR_google_cloud_vision import OCREngine
 '''
 * For the prefix_removal, the image names have 'MICH-V-' prior to the barcode, so that is used for matching
         self.prompt_version = None
         self.is_hf = is_hf
+        self.trOCR_model_version = "microsoft/trocr-large-handwritten"
+        # self.trOCR_model_version = "microsoft/trocr-base-handwritten"
+        # self.trOCR_model_version = "dh-unibe/trocr-medieval-escriptmask" # NOPE
+        # self.trOCR_model_version = "dh-unibe/trocr-kurrent" # NOPE
+        # self.trOCR_model_version = "DunnBC22/trocr-base-handwritten-OCR-handwriting_recognition_v2" # NOPE
         self.trOCR_processor = None
         self.trOCR_model = None
                        "GEO_decimal_long","GEO_city", "GEO_county", "GEO_state",
                        "GEO_state_code", "GEO_country", "GEO_country_code", "GEO_continent",]
+        self.usage_headers = ["current_time", "inference_time_s", "tool_time_s","max_cpu", "max_ram_gb", "n_gpus", "max_gpu_load", "max_gpu_vram_gb","total_gpu_vram_gb","capability_score",]
         self.wfo_headers = ["WFO_override_OCR", "WFO_exact_match","WFO_exact_match_name","WFO_best_match","WFO_candidate_names","WFO_placement"]
         self.wfo_headers_no_lists = ["WFO_override_OCR", "WFO_exact_match","WFO_exact_match_name","WFO_best_match","WFO_placement"]
+        self.utility_headers = ["filename"] + self.wfo_headers + self.geo_headers + self.usage_headers + ["run_name", "prompt", "LLM", "tokens_in", "tokens_out", "path_to_crop","path_to_original","path_to_content","path_to_helper",]
                                 # "WFO_override_OCR", "WFO_exact_match","WFO_exact_match_name","WFO_best_match","WFO_candidate_names","WFO_placement",
                                 # "GEO_override_OCR", "GEO_method", "GEO_formatted_full_string", "GEO_decimal_lat",
         lgr = logging.getLogger('transformers')
         lgr.setLevel(logging.ERROR)
+        self.trOCR_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") # usually just the "microsoft/trocr-base-handwritten"
+        self.trOCR_model = VisionEncoderDecoderModel.from_pretrained(self.trOCR_model_version) # This matches the model
         # Check for GPU availability
         self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
                     break
+    def add_data_to_excel_from_response(self, Dirs, path_transcription, response, WFO_record, GEO_record, usage_report, MODEL_NAME_FORMATTED, filename_without_extension, path_to_crop, path_to_content, path_to_helper, nt_in, nt_out):
         wb = openpyxl.load_workbook(path_transcription)
                 sheet.cell(row=next_row, column=i, value=filename_without_extension)
             elif header.value == "prompt":
                 sheet.cell(row=next_row, column=i, value=os.path.basename(self.path_custom_prompts))
+            elif header.value == "run_name":
+                sheet.cell(row=next_row, column=i, value=Dirs.run_name)
             # "WFO_exact_match","WFO_exact_match_name","WFO_best_match","WFO_candidate_names","WFO_placement"
             elif header.value in self.wfo_headers_no_lists:
     ##################################################################################################################################
     ##################################################     OCR      ##################################################################
     ##################################################################################################################################
+    def perform_OCR_and_save_results(self, image_index, json_report, jpg_file_path_OCR_helper, txt_file_path_OCR, txt_file_path_OCR_bounds):
         self.logger.info(f'Working on {image_index + 1}/{len(self.img_paths)} --- Starting OCR')
         # self.OCR - None
         ### Process_image() runs the OCR for text, handwriting, trOCR AND creates the overlay image
+        ocr_google = OCREngine(self.logger, json_report, self.dir_home, self.is_hf, self.path_to_crop, self.cfg, self.trOCR_model_version, self.trOCR_model, self.trOCR_processor, self.device)
         ocr_google.process_image(self.do_create_OCR_helper_image, self.logger)
         self.OCR = ocr_google.OCR
             filename_without_extension, txt_file_path, txt_file_path_OCR, txt_file_path_OCR_bounds, jpg_file_path_OCR_helper, json_file_path_wiki, txt_file_path_ind_prompt = paths
             json_report.set_text(text_main='Starting OCR')
+            self.perform_OCR_and_save_results(i, json_report, jpg_file_path_OCR_helper, txt_file_path_OCR, txt_file_path_OCR_bounds)
             json_report.set_text(text_main='Finished OCR')
             if not self.OCR:
         filename_without_extension, txt_file_path, txt_file_path_OCR, txt_file_path_OCR_bounds, jpg_file_path_OCR_helper, json_file_path_wiki, txt_file_path_ind_prompt = paths
         # Saving the JSON and XLSX files with the response and updating the final JSON response
         if response_candidate is not None:
+            final_JSON_response_updated = self.save_json_and_xlsx(self.Dirs, response_candidate, WFO_record, GEO_record, usage_report, MODEL_NAME_FORMATTED, filename_without_extension, path_to_crop, txt_file_path, jpg_file_path_OCR_helper, nt_in, nt_out)
             return final_JSON_response_updated, WFO_record, GEO_record
         else:
+            final_JSON_response_updated = self.save_json_and_xlsx(self.Dirs, response_candidate, WFO_record, GEO_record, usage_report, MODEL_NAME_FORMATTED, filename_without_extension, path_to_crop, txt_file_path, jpg_file_path_OCR_helper, nt_in, nt_out)
             return final_JSON_response_updated, WFO_record, GEO_record
         return filename_without_extension, txt_file_path, txt_file_path_OCR, txt_file_path_OCR_bounds, jpg_file_path_OCR_helper, json_file_path_wiki, txt_file_path_ind_prompt
+    def save_json_and_xlsx(self, Dirs, response, WFO_record, GEO_record, usage_report, MODEL_NAME_FORMATTED, filename_without_extension, path_to_crop, txt_file_path, jpg_file_path_OCR_helper, nt_in, nt_out):
         if response is None:
             response = self.JSON_dict_structure
             # Insert 'filename' as the first key
             # Then add the null info to the spreadsheet
             response_null = self.create_null_row(filename_without_extension, path_to_crop, txt_file_path, jpg_file_path_OCR_helper)
+            self.add_data_to_excel_from_response(Dirs, self.path_transcription, response_null, WFO_record, GEO_record, usage_report, MODEL_NAME_FORMATTED, filename_without_extension, path_to_crop, txt_file_path, jpg_file_path_OCR_helper, nt_in=0, nt_out=0)
         ### Set completed JSON
         else:
             response = self.clean_catalog_number(response, filename_without_extension)
             self.write_json_to_file(txt_file_path, response)
             # add to the xlsx file
+            self.add_data_to_excel_from_response(Dirs, self.path_transcription, response, WFO_record, GEO_record, usage_report, MODEL_NAME_FORMATTED, filename_without_extension, path_to_crop, txt_file_path, jpg_file_path_OCR_helper, nt_in, nt_out)
         return response

vouchervision/vouchervision_main.py CHANGED Viewed

@@ -3,10 +3,10 @@ VoucherVision - based on LeafMachine2 Processes
 '''
 import os, inspect, sys, shutil
 from time import perf_counter
-currentdir = os.path.dirname(os.path.dirname(inspect.getfile(inspect.currentframe())))
-parentdir = os.path.dirname(currentdir)
-sys.path.append(parentdir)
-sys.path.append(currentdir)
 from vouchervision.component_detector.component_detector import detect_plant_components, detect_archival_components
 from vouchervision.general_utils import save_token_info_as_csv, print_main_start, check_for_subdirs_VV, load_config_file, load_config_file_testing, report_config, save_config_file, crop_detections_from_images_VV
 from vouchervision.directory_structure_VV import Dir_Structure
@@ -90,7 +90,14 @@ def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progr
     else:
         upload_to_drive(zip_filepath, zip_filename, is_hf, cfg_private=Voucher_Vision.cfg_private, do_upload=False) ##################################### TODO Make this configurable
-    return last_JSON_response, final_WFO_record, final_GEO_record, total_cost, Voucher_Vision.n_failed_OCR, Voucher_Vision.n_failed_LLM_calls, zip_filepath
 def make_zipfile(base_dir, output_filename):
     # Determine the directory where the zip file should be saved

 '''
 import os, inspect, sys, shutil
 from time import perf_counter
+# currentdir = os.path.dirname(os.path.dirname(inspect.getfile(inspect.currentframe())))
+# parentdir = os.path.dirname(currentdir)
+# sys.path.append(parentdir)
+# sys.path.append(currentdir)
 from vouchervision.component_detector.component_detector import detect_plant_components, detect_archival_components
 from vouchervision.general_utils import save_token_info_as_csv, print_main_start, check_for_subdirs_VV, load_config_file, load_config_file_testing, report_config, save_config_file, crop_detections_from_images_VV
 from vouchervision.directory_structure_VV import Dir_Structure
     else:
         upload_to_drive(zip_filepath, zip_filename, is_hf, cfg_private=Voucher_Vision.cfg_private, do_upload=False) ##################################### TODO Make this configurable
+    return {'last_JSON_response': last_JSON_response,
+            'final_WFO_record': final_WFO_record,
+            'final_GEO_record': final_GEO_record,
+            'total_cost': total_cost,
+            'n_failed_OCR': Voucher_Vision.n_failed_OCR,
+            'n_failed_LLM_calls': Voucher_Vision.n_failed_LLM_calls,
+            'zip_filepath': zip_filepath,
+            }
 def make_zipfile(base_dir, output_filename):
     # Determine the directory where the zip file should be saved