Spaces:
Running
Running
phyloforfun
commited on
Commit
•
a145e37
1
Parent(s):
94dfdfd
req
Browse files- app.py +145 -77
- vouchervision/OCR_Florence_2.py +41 -15
- vouchervision/OCR_GPT4oMini.py +94 -0
- vouchervision/OCR_google_cloud_vision.py +63 -41
- vouchervision/VoucherVision_Config_Builder.py +6 -3
- vouchervision/general_utils.py +23 -6
- vouchervision/model_maps.py +34 -14
- vouchervision/utils_LLM.py +25 -9
- vouchervision/utils_VoucherVision.py +16 -3
- vouchervision/vouchervision_main.py +2 -2
app.py
CHANGED
@@ -218,10 +218,10 @@ if 'dir_images_local_TEMP' not in st.session_state:
|
|
218 |
st.session_state['dir_images_local_TEMP'] = False
|
219 |
if 'dir_uploaded_images' not in st.session_state:
|
220 |
st.session_state['dir_uploaded_images'] = os.path.join(st.session_state.dir_home,'uploads')
|
221 |
-
validate_dir(st.session_state
|
222 |
if 'dir_uploaded_images_small' not in st.session_state:
|
223 |
st.session_state['dir_uploaded_images_small'] = os.path.join(st.session_state.dir_home,'uploads_small')
|
224 |
-
validate_dir(st.session_state
|
225 |
|
226 |
|
227 |
|
@@ -264,16 +264,18 @@ def handle_image_upload_and_gallery_hf(uploaded_files):
|
|
264 |
|
265 |
ind_small = 0
|
266 |
for uploaded_file in uploaded_files:
|
|
|
267 |
if SAFE.check_for_inappropriate_content(uploaded_file):
|
268 |
clear_image_uploads()
|
269 |
report_violation(uploaded_file.name, is_hf=st.session_state['is_hf'])
|
270 |
st.error("Warning: You uploaded an image that violates our terms of service.")
|
|
|
271 |
|
272 |
|
273 |
# Determine the file type
|
274 |
if uploaded_file.name.lower().endswith('.pdf'):
|
275 |
# Handle PDF files
|
276 |
-
file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file
|
277 |
# Convert each page of the PDF to an image
|
278 |
n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=200)#st.session_state.config['leafmachine']['project']['dir_images_local'])
|
279 |
# Update the input list for each page image
|
@@ -288,27 +290,22 @@ def handle_image_upload_and_gallery_hf(uploaded_files):
|
|
288 |
# Optionally, create a thumbnail for the gallery
|
289 |
img = Image.open(jpg_file_path)
|
290 |
img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
|
291 |
-
|
292 |
file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], file_name, img)
|
293 |
-
|
294 |
file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images_small'],st.session_state['dir_uploaded_images_small'], file_name, img)
|
295 |
st.session_state['input_list_small'].append(file_path_small)
|
296 |
|
297 |
else:
|
298 |
ind_small += 1
|
299 |
# Handle JPG/JPEG files (existing process)
|
300 |
-
|
301 |
-
# file_path = os.path.join(st.session_state['dir_uploaded_images'], uploaded_file.name)
|
302 |
-
image = Image.open(uploaded_file)
|
303 |
-
file_path = os.path.join(st.session_state['dir_uploaded_images'], uploaded_file.name)
|
304 |
-
image.save(file_path, "JPEG")
|
305 |
-
|
306 |
st.session_state['input_list'].append(file_path)
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
|
313 |
# After processing all files
|
314 |
st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images']
|
@@ -396,7 +393,7 @@ def content_input_images(col_left, col_right):
|
|
396 |
|
397 |
with col_right:
|
398 |
if st.session_state.is_hf:
|
399 |
-
handle_image_upload_and_gallery_hf(uploaded_files)
|
400 |
|
401 |
else:
|
402 |
st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)
|
@@ -1767,12 +1764,47 @@ def content_prompt_and_llm_version():
|
|
1767 |
st.page_link(os.path.join(os.path.dirname(__file__),"pages","prompt_builder.py"), label="Prompt Builder", icon="🚧")
|
1768 |
|
1769 |
|
1770 |
-
st.header('LLM Version')
|
1771 |
-
col_llm_1, col_llm_2 = st.columns([4,2])
|
1772 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1773 |
with col_llm_1:
|
1774 |
-
|
1775 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1776 |
st.markdown("""
|
1777 |
Based on preliminary results, the following models perform the best. We are currently running tests of all possible OCR + LLM + Prompt combinations to create recipes for different workflows.
|
1778 |
- Any Mistral model e.g., `Mistral Large`
|
@@ -1815,25 +1847,43 @@ def content_api_check():
|
|
1815 |
|
1816 |
|
1817 |
|
1818 |
-
def adjust_ocr_options_based_on_capability(capability_score):
|
1819 |
-
|
1820 |
-
|
1821 |
-
|
1822 |
-
|
1823 |
-
|
1824 |
-
|
1825 |
-
|
1826 |
-
|
1827 |
-
|
1828 |
-
|
1829 |
-
|
1830 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1831 |
|
1832 |
-
|
1833 |
-
|
1834 |
-
|
1835 |
-
|
1836 |
-
|
1837 |
|
1838 |
|
1839 |
|
@@ -1867,12 +1917,22 @@ def content_ocr_method():
|
|
1867 |
|
1868 |
c1, c2 = st.columns([4,4])
|
1869 |
|
1870 |
-
|
1871 |
-
|
1872 |
-
|
1873 |
-
|
1874 |
-
|
1875 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1876 |
|
1877 |
demo_text_h = f"Google_OCR_Handwriting:\nHERBARIUM OF MARCUS W. LYON , JR . Tracaulon sagittatum Indiana : Porter Co. incal Springs edge wet subdunal woods 1927 TX 11 Ilowers pink UNIVERSITE HERBARIUM MICH University of Michigan Herbarium 1439649 copyright reserved PERSICARIA FEB 2 6 1965 cm "
|
1878 |
demo_text_tr = f"trOCR:\nherbarium of marcus w. lyon jr. : : : tracaulon sagittatum indiana porter co. incal springs TX 11 Ilowers pink 1439649 copyright reserved D H U Q "
|
@@ -1882,7 +1942,7 @@ def content_ocr_method():
|
|
1882 |
demo_text_trh = demo_text_h + '\n' + demo_text_tr
|
1883 |
demo_text_trp = demo_text_p + '\n' + demo_text_tr
|
1884 |
|
1885 |
-
options = ["Google Vision Handwritten", "Google Vision Printed", "CRAFT + trOCR","LLaVA",
|
1886 |
options_llava = ["llava-v1.6-mistral-7b", "llava-v1.6-34b", "llava-v1.6-vicuna-13b", "llava-v1.6-vicuna-7b",]
|
1887 |
options_llava_bit = ["full", "4bit",]
|
1888 |
captions_llava = [
|
@@ -1905,7 +1965,7 @@ def content_ocr_method():
|
|
1905 |
default_index_llava_bit = 0
|
1906 |
with c1:
|
1907 |
st.subheader("API Methods (Google Vision)")
|
1908 |
-
st.write("Using APIs for OCR allows VoucherVision to run on most computers.")
|
1909 |
|
1910 |
st.session_state.config['leafmachine']['project']['double_OCR'] = st.checkbox(label="Send 2 copies of the OCR to the LLM",
|
1911 |
help="This can help the LLMs focus attention on the OCR and not get lost in the longer instruction text",
|
@@ -1934,6 +1994,7 @@ def content_ocr_method():
|
|
1934 |
"CRAFT + trOCR": 'CRAFT',
|
1935 |
"LLaVA": 'LLaVA',
|
1936 |
"Florence-2": 'Florence-2',
|
|
|
1937 |
}
|
1938 |
|
1939 |
# Map selected options to their corresponding internal representations
|
@@ -1943,45 +2004,52 @@ def content_ocr_method():
|
|
1943 |
st.session_state.config['leafmachine']['project']['OCR_option'] = selected_OCR_options
|
1944 |
|
1945 |
|
1946 |
-
with c2:
|
1947 |
-
st.subheader("Local Methods")
|
1948 |
-
st.write("Local methods are free, but require a capable GPU. ")
|
1949 |
-
|
1950 |
|
1951 |
-
|
1952 |
if 'CRAFT' in selected_OCR_options:
|
1953 |
-
|
1954 |
-
|
1955 |
-
|
1956 |
-
|
1957 |
-
|
1958 |
-
|
1959 |
-
|
1960 |
-
|
1961 |
-
|
1962 |
-
|
1963 |
-
|
1964 |
-
|
1965 |
-
|
1966 |
-
|
1967 |
-
|
|
|
|
|
|
|
1968 |
|
1969 |
|
1970 |
if "Florence-2" in selected_OCR_options:
|
|
|
1971 |
default_florence_model_path = st.session_state.config['leafmachine']['project']['florence_model_path']
|
1972 |
-
user_input_florence_model_path = st.text_input("Florence-2 Hugging Face model path. MUST be a Florence-2 version based on 'microsoft/Florence-2-large' or similar.", value=default_florence_model_path)
|
1973 |
|
1974 |
-
|
1975 |
-
|
1976 |
-
|
1977 |
-
|
1978 |
-
|
1979 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1980 |
|
1981 |
|
1982 |
if 'LLaVA' in selected_OCR_options:
|
|
|
1983 |
OCR_option_llava = st.radio(
|
1984 |
-
"Select the LLaVA version",
|
1985 |
options_llava,
|
1986 |
index=default_index_llava,
|
1987 |
help="",captions=captions_llava,
|
@@ -1989,12 +2057,13 @@ def content_ocr_method():
|
|
1989 |
st.session_state.config['leafmachine']['project']['OCR_option_llava'] = OCR_option_llava
|
1990 |
|
1991 |
OCR_option_llava_bit = st.radio(
|
1992 |
-
"Select the LLaVA quantization level",
|
1993 |
options_llava_bit,
|
1994 |
index=default_index_llava_bit,
|
1995 |
help="",captions=captions_llava_bit,
|
1996 |
)
|
1997 |
st.session_state.config['leafmachine']['project']['OCR_option_llava_bit'] = OCR_option_llava_bit
|
|
|
1998 |
|
1999 |
|
2000 |
|
@@ -2045,7 +2114,6 @@ def show_ocr():
|
|
2045 |
# st.image(st.session_state["demo_overlay"], caption='OCR Overlay Images', output_format = "JPEG")
|
2046 |
|
2047 |
def content_collage_overlay():
|
2048 |
-
st.markdown("---")
|
2049 |
col_collage, col_overlay = st.columns([4,4])
|
2050 |
|
2051 |
|
|
|
218 |
st.session_state['dir_images_local_TEMP'] = False
|
219 |
if 'dir_uploaded_images' not in st.session_state:
|
220 |
st.session_state['dir_uploaded_images'] = os.path.join(st.session_state.dir_home,'uploads')
|
221 |
+
validate_dir(os.path.join(st.session_state.dir_home,'uploads'))
|
222 |
if 'dir_uploaded_images_small' not in st.session_state:
|
223 |
st.session_state['dir_uploaded_images_small'] = os.path.join(st.session_state.dir_home,'uploads_small')
|
224 |
+
validate_dir(os.path.join(st.session_state.dir_home,'uploads_small'))
|
225 |
|
226 |
|
227 |
|
|
|
264 |
|
265 |
ind_small = 0
|
266 |
for uploaded_file in uploaded_files:
|
267 |
+
|
268 |
if SAFE.check_for_inappropriate_content(uploaded_file):
|
269 |
clear_image_uploads()
|
270 |
report_violation(uploaded_file.name, is_hf=st.session_state['is_hf'])
|
271 |
st.error("Warning: You uploaded an image that violates our terms of service.")
|
272 |
+
return True
|
273 |
|
274 |
|
275 |
# Determine the file type
|
276 |
if uploaded_file.name.lower().endswith('.pdf'):
|
277 |
# Handle PDF files
|
278 |
+
file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
|
279 |
# Convert each page of the PDF to an image
|
280 |
n_pages = convert_pdf_to_jpg(file_path, st.session_state['dir_uploaded_images'], dpi=200)#st.session_state.config['leafmachine']['project']['dir_images_local'])
|
281 |
# Update the input list for each page image
|
|
|
290 |
# Optionally, create a thumbnail for the gallery
|
291 |
img = Image.open(jpg_file_path)
|
292 |
img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
|
293 |
+
try:
|
294 |
file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], file_name, img)
|
295 |
+
except:
|
296 |
file_path_small = save_uploaded_file_local(st.session_state['dir_uploaded_images_small'],st.session_state['dir_uploaded_images_small'], file_name, img)
|
297 |
st.session_state['input_list_small'].append(file_path_small)
|
298 |
|
299 |
else:
|
300 |
ind_small += 1
|
301 |
# Handle JPG/JPEG files (existing process)
|
302 |
+
file_path = save_uploaded_file(st.session_state['dir_uploaded_images'], uploaded_file)
|
|
|
|
|
|
|
|
|
|
|
303 |
st.session_state['input_list'].append(file_path)
|
304 |
+
if ind_small < MAX_GALLERY_IMAGES +5:
|
305 |
+
img = Image.open(file_path)
|
306 |
+
img.thumbnail((GALLERY_IMAGE_SIZE, GALLERY_IMAGE_SIZE), Image.Resampling.LANCZOS)
|
307 |
+
file_path_small = save_uploaded_file(st.session_state['dir_uploaded_images_small'], uploaded_file, img)
|
308 |
+
st.session_state['input_list_small'].append(file_path_small)
|
309 |
|
310 |
# After processing all files
|
311 |
st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images']
|
|
|
393 |
|
394 |
with col_right:
|
395 |
if st.session_state.is_hf:
|
396 |
+
result = handle_image_upload_and_gallery_hf(uploaded_files)
|
397 |
|
398 |
else:
|
399 |
st.session_state['view_local_gallery'] = st.toggle("View Image Gallery",)
|
|
|
1764 |
st.page_link(os.path.join(os.path.dirname(__file__),"pages","prompt_builder.py"), label="Prompt Builder", icon="🚧")
|
1765 |
|
1766 |
|
1767 |
+
# st.header('LLM Version')
|
1768 |
+
# col_llm_1, col_llm_2 = st.columns([4,2])
|
1769 |
|
1770 |
+
# with col_llm_1:
|
1771 |
+
# GUI_MODEL_LIST = ModelMaps.get_models_gui_list()
|
1772 |
+
# st.session_state.config['leafmachine']['LLM_version'] = st.selectbox("LLM version", GUI_MODEL_LIST, index=GUI_MODEL_LIST.index(st.session_state.config['leafmachine'].get('LLM_version', ModelMaps.MODELS_GUI_DEFAULT)))
|
1773 |
+
|
1774 |
+
|
1775 |
+
# Determine the default family based on the default model
|
1776 |
+
default_model = ModelMaps.MODELS_GUI_DEFAULT
|
1777 |
+
default_family = None
|
1778 |
+
for family, models in ModelMaps.MODEL_FAMILY.items():
|
1779 |
+
if default_model in models:
|
1780 |
+
default_family = family
|
1781 |
+
break
|
1782 |
+
|
1783 |
+
st.header("LLM Version")
|
1784 |
+
|
1785 |
+
col_llm_1, col_llm_2 = st.columns([4, 2])
|
1786 |
with col_llm_1:
|
1787 |
+
# Step 1: Select Model Family with default family pre-selected
|
1788 |
+
family_list = list(ModelMaps.MODEL_FAMILY.keys())
|
1789 |
+
selected_family = st.selectbox("Select Model Family", family_list, index=family_list.index(default_family) if default_family else 0)
|
1790 |
+
|
1791 |
+
# Step 2: Display Models based on selected family
|
1792 |
+
GUI_MODEL_LIST = ModelMaps.get_models_gui_list_family(selected_family)
|
1793 |
+
|
1794 |
+
# Ensure the selected model is part of the current family; if not, use the default of this family
|
1795 |
+
selected_model_default = st.session_state.config['leafmachine'].get('LLM_version', default_model)
|
1796 |
+
if selected_model_default not in GUI_MODEL_LIST:
|
1797 |
+
selected_model_default = GUI_MODEL_LIST[0]
|
1798 |
+
|
1799 |
+
selected_model = st.selectbox("LLM version", GUI_MODEL_LIST, index=GUI_MODEL_LIST.index(selected_model_default))
|
1800 |
+
|
1801 |
+
# Update the session state with the selected model
|
1802 |
+
st.session_state.config['leafmachine']['LLM_version'] = selected_model
|
1803 |
+
|
1804 |
+
|
1805 |
+
|
1806 |
+
|
1807 |
+
|
1808 |
st.markdown("""
|
1809 |
Based on preliminary results, the following models perform the best. We are currently running tests of all possible OCR + LLM + Prompt combinations to create recipes for different workflows.
|
1810 |
- Any Mistral model e.g., `Mistral Large`
|
|
|
1847 |
|
1848 |
|
1849 |
|
1850 |
+
def adjust_ocr_options_based_on_capability(capability_score, model_name='llava'):
|
1851 |
+
if model_name == 'llava':
|
1852 |
+
llava_models_requirements = {
|
1853 |
+
"liuhaotian/llava-v1.6-mistral-7b": {"full": 18, "4bit": 9},
|
1854 |
+
"liuhaotian/llava-v1.6-34b": {"full": 70, "4bit": 25},
|
1855 |
+
"liuhaotian/llava-v1.6-vicuna-13b": {"full": 33, "4bit": 15},
|
1856 |
+
"liuhaotian/llava-v1.6-vicuna-7b": {"full": 20, "4bit": 10},
|
1857 |
+
}
|
1858 |
+
if capability_score == 'no_gpu':
|
1859 |
+
return False
|
1860 |
+
else:
|
1861 |
+
capability_score_n = int(capability_score.split("_")[1].split("GB")[0])
|
1862 |
+
supported_models = [model for model, reqs in llava_models_requirements.items()
|
1863 |
+
if reqs["full"] <= capability_score_n or reqs["4bit"] <= capability_score_n]
|
1864 |
+
|
1865 |
+
# If no models are supported, disable the LLaVA option
|
1866 |
+
if not supported_models:
|
1867 |
+
# Assuming the LLaVA option is the last in your list
|
1868 |
+
return False # Indicate LLaVA is not supported
|
1869 |
+
return True # Indicate LLaVA is supported
|
1870 |
+
elif model_name == 'florence-2':
|
1871 |
+
florence_models_requirements = {
|
1872 |
+
"microsoft/Florence-2-large": {"full": 16,},
|
1873 |
+
"microsoft/Florence-2-base": {"full": 12,},
|
1874 |
+
}
|
1875 |
+
if capability_score == 'no_gpu':
|
1876 |
+
return False
|
1877 |
+
else:
|
1878 |
+
capability_score_n = int(capability_score.split("_")[1].split("GB")[0])
|
1879 |
+
supported_models = [model for model, reqs in florence_models_requirements.items()
|
1880 |
+
if reqs["full"] <= capability_score_n]
|
1881 |
|
1882 |
+
# If no models are supported, disable the model option
|
1883 |
+
if not supported_models:
|
1884 |
+
# Assuming the model option is the last in your list
|
1885 |
+
return False # Indicate model is not supported
|
1886 |
+
return True # Indicate model is supported
|
1887 |
|
1888 |
|
1889 |
|
|
|
1917 |
|
1918 |
c1, c2 = st.columns([4,4])
|
1919 |
|
1920 |
+
with c2:
|
1921 |
+
st.subheader("Local Methods")
|
1922 |
+
st.write("Local methods are free, but require a capable GPU. ")
|
1923 |
+
# Check if LLaVA models are supported based on capability score
|
1924 |
+
llava_supported = adjust_ocr_options_based_on_capability(st.session_state.capability_score, model_name='llava')
|
1925 |
+
florence_supported = adjust_ocr_options_based_on_capability(st.session_state.capability_score, model_name='florence-2')
|
1926 |
+
|
1927 |
+
if llava_supported:
|
1928 |
+
st.success("LLaVA models are supported on this computer. A GPU with at least 12 GB of VRAM is available.")
|
1929 |
+
else:
|
1930 |
+
st.warning("LLaVA models are NOT supported on this computer. Requires a GPU with at least 12 GB of VRAM.")
|
1931 |
+
|
1932 |
+
if llava_supported:
|
1933 |
+
st.success("Florence-2 models are supported on this computer. A GPU with at least 12 GB of VRAM is available.")
|
1934 |
+
else:
|
1935 |
+
st.warning("Florence-2 models are NOT supported on this computer. Requires a GPU with at least 12 GB of VRAM.")
|
1936 |
|
1937 |
demo_text_h = f"Google_OCR_Handwriting:\nHERBARIUM OF MARCUS W. LYON , JR . Tracaulon sagittatum Indiana : Porter Co. incal Springs edge wet subdunal woods 1927 TX 11 Ilowers pink UNIVERSITE HERBARIUM MICH University of Michigan Herbarium 1439649 copyright reserved PERSICARIA FEB 2 6 1965 cm "
|
1938 |
demo_text_tr = f"trOCR:\nherbarium of marcus w. lyon jr. : : : tracaulon sagittatum indiana porter co. incal springs TX 11 Ilowers pink 1439649 copyright reserved D H U Q "
|
|
|
1942 |
demo_text_trh = demo_text_h + '\n' + demo_text_tr
|
1943 |
demo_text_trp = demo_text_p + '\n' + demo_text_tr
|
1944 |
|
1945 |
+
options = ["Google Vision Handwritten", "Google Vision Printed", "Florence-2", "GPT-4o-mini", "CRAFT + trOCR","LLaVA", ]
|
1946 |
options_llava = ["llava-v1.6-mistral-7b", "llava-v1.6-34b", "llava-v1.6-vicuna-13b", "llava-v1.6-vicuna-7b",]
|
1947 |
options_llava_bit = ["full", "4bit",]
|
1948 |
captions_llava = [
|
|
|
1965 |
default_index_llava_bit = 0
|
1966 |
with c1:
|
1967 |
st.subheader("API Methods (Google Vision)")
|
1968 |
+
st.write("Using APIs for OCR allows VoucherVision to run on most computers. You can use multiple OCR engines simultaneously.")
|
1969 |
|
1970 |
st.session_state.config['leafmachine']['project']['double_OCR'] = st.checkbox(label="Send 2 copies of the OCR to the LLM",
|
1971 |
help="This can help the LLMs focus attention on the OCR and not get lost in the longer instruction text",
|
|
|
1994 |
"CRAFT + trOCR": 'CRAFT',
|
1995 |
"LLaVA": 'LLaVA',
|
1996 |
"Florence-2": 'Florence-2',
|
1997 |
+
"GPT-4o-mini": "GPT-4o-mini",
|
1998 |
}
|
1999 |
|
2000 |
# Map selected options to their corresponding internal representations
|
|
|
2004 |
st.session_state.config['leafmachine']['project']['OCR_option'] = selected_OCR_options
|
2005 |
|
2006 |
|
|
|
|
|
|
|
|
|
2007 |
|
2008 |
+
|
2009 |
if 'CRAFT' in selected_OCR_options:
|
2010 |
+
st.subheader('Options for :blue[CRAFT + trOCR]')
|
2011 |
+
st.write("Supplement Google Vision OCR with :blue[trOCR] (handwriting OCR) using `microsoft/trocr-base-handwritten`. This option requires Google Vision API and a GPU.")
|
2012 |
+
if 'CRAFT' in selected_OCR_options:
|
2013 |
+
do_use_trOCR = st.checkbox("Enable :blue[trOCR]", value=True, key="Enable trOCR1",disabled=True)#,disabled=st.session_state['lacks_GPU'])
|
2014 |
+
else:
|
2015 |
+
do_use_trOCR = st.checkbox("Enable :blue[trOCR]", value=st.session_state.config['leafmachine']['project']['do_use_trOCR'],key="Enable trOCR2")#,disabled=st.session_state['lacks_GPU'])
|
2016 |
+
st.session_state.config['leafmachine']['project']['do_use_trOCR'] = do_use_trOCR
|
2017 |
+
|
2018 |
+
if do_use_trOCR:
|
2019 |
+
# st.session_state.config['leafmachine']['project']['trOCR_model_path'] = "microsoft/trocr-large-handwritten"
|
2020 |
+
default_trOCR_model_path = st.session_state.config['leafmachine']['project']['trOCR_model_path']
|
2021 |
+
user_input_trOCR_model_path = st.text_input(":blue[trOCR] Hugging Face model path. MUST be a fine-tuned version of 'microsoft/trocr-base-handwritten' or 'microsoft/trocr-large-handwritten', or a microsoft :blue[trOCR] model.", value=default_trOCR_model_path)
|
2022 |
+
if st.session_state.config['leafmachine']['project']['trOCR_model_path'] != user_input_trOCR_model_path:
|
2023 |
+
is_valid_mp = is_valid_huggingface_model_path(user_input_trOCR_model_path)
|
2024 |
+
if not is_valid_mp:
|
2025 |
+
st.error(f"The Hugging Face model path {user_input_trOCR_model_path} is not valid. Please revise.")
|
2026 |
+
else:
|
2027 |
+
st.session_state.config['leafmachine']['project']['trOCR_model_path'] = user_input_trOCR_model_path
|
2028 |
|
2029 |
|
2030 |
if "Florence-2" in selected_OCR_options:
|
2031 |
+
st.subheader('Options for :green[Florence-2]')
|
2032 |
default_florence_model_path = st.session_state.config['leafmachine']['project']['florence_model_path']
|
|
|
2033 |
|
2034 |
+
st.session_state.config['leafmachine']['project']['florence_model_path'] = st.radio(
|
2035 |
+
"Select :green[Florence-2] version.",
|
2036 |
+
["microsoft/Florence-2-large", "microsoft/Florence-2-base", ],
|
2037 |
+
captions=["'large' requires at least 16GB of VRAM", "'base' requires 12GB of VRAM."])
|
2038 |
+
|
2039 |
+
if "GPT-4o-mini" in selected_OCR_options:
|
2040 |
+
st.subheader('Options for :violet[GPT-4o-mini]')
|
2041 |
+
default_resolution = st.session_state.config['leafmachine']['project']['OCR_GPT_4o_mini_resolution']
|
2042 |
+
|
2043 |
+
st.session_state.config['leafmachine']['project']['OCR_GPT_4o_mini_resolution'] = st.radio(
|
2044 |
+
"Select level of detail for :violet[GPT-4o-mini] OCR. We only recommend 'high' detail in most scenarios.",
|
2045 |
+
["high", "low", ],
|
2046 |
+
captions=["$0.50 per 1,000", "\$5 - \$10 per 1,000"])
|
2047 |
|
2048 |
|
2049 |
if 'LLaVA' in selected_OCR_options:
|
2050 |
+
st.subheader('Options for :red[LLaVA]')
|
2051 |
OCR_option_llava = st.radio(
|
2052 |
+
"Select the :red[LLaVA] version",
|
2053 |
options_llava,
|
2054 |
index=default_index_llava,
|
2055 |
help="",captions=captions_llava,
|
|
|
2057 |
st.session_state.config['leafmachine']['project']['OCR_option_llava'] = OCR_option_llava
|
2058 |
|
2059 |
OCR_option_llava_bit = st.radio(
|
2060 |
+
"Select the :red[LLaVA] quantization level",
|
2061 |
options_llava_bit,
|
2062 |
index=default_index_llava_bit,
|
2063 |
help="",captions=captions_llava_bit,
|
2064 |
)
|
2065 |
st.session_state.config['leafmachine']['project']['OCR_option_llava_bit'] = OCR_option_llava_bit
|
2066 |
+
st.write('---')
|
2067 |
|
2068 |
|
2069 |
|
|
|
2114 |
# st.image(st.session_state["demo_overlay"], caption='OCR Overlay Images', output_format = "JPEG")
|
2115 |
|
2116 |
def content_collage_overlay():
|
|
|
2117 |
col_collage, col_overlay = st.columns([4,4])
|
2118 |
|
2119 |
|
vouchervision/OCR_Florence_2.py
CHANGED
@@ -6,12 +6,18 @@ import matplotlib.patches as patches
|
|
6 |
from PIL import Image, ImageDraw, ImageFont
|
7 |
import numpy as np
|
8 |
import warnings
|
9 |
-
from transformers import AutoProcessor, AutoModelForCausalLM, AutoTokenizer
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
warnings.filterwarnings("ignore", category=UserWarning, message="TypedStorage is deprecated")
|
13 |
|
14 |
class FlorenceOCR:
|
|
|
15 |
def __init__(self, logger, model_id='microsoft/Florence-2-large'):
|
16 |
self.MAX_TOKENS = 1024
|
17 |
self.logger = logger
|
@@ -25,7 +31,15 @@ class FlorenceOCR:
|
|
25 |
# self.model_id_clean = "mistralai/Mistral-7B-v0.3"
|
26 |
self.model_id_clean = "unsloth/mistral-7b-instruct-v0.3-bnb-4bit"
|
27 |
self.tokenizer_clean = AutoTokenizer.from_pretrained(self.model_id_clean)
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
|
31 |
def ocr_florence(self, image, task_prompt='<OCR>', text_input=None):
|
@@ -54,34 +68,46 @@ class FlorenceOCR:
|
|
54 |
num_beams=3,
|
55 |
)
|
56 |
generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
|
57 |
-
|
58 |
generated_text,
|
59 |
task=task_prompt,
|
60 |
image_size=(image.width, image.height)
|
61 |
)
|
62 |
|
63 |
-
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
-
|
67 |
-
parsed_answer = self.tokenizer_clean.decode(outputs[0], skip_special_tokens=True)
|
68 |
-
print(parsed_answer_dirty)
|
69 |
-
print(parsed_answer)
|
70 |
|
71 |
self.monitor.stop_inference_timer() # Starts tool timer too
|
72 |
usage_report = self.monitor.stop_monitoring_report_usage()
|
73 |
|
74 |
-
return
|
75 |
|
76 |
|
77 |
def main():
|
78 |
-
img_path = '/home/brlab/Downloads/gem_2024_06_26__02-26-02/Cropped_Images/By_Class/label/1.jpg'
|
79 |
-
|
80 |
|
81 |
image = Image.open(img_path)
|
82 |
|
83 |
-
ocr = FlorenceOCR(logger = None)
|
84 |
-
|
|
|
85 |
print(results_text)
|
86 |
|
87 |
if __name__ == '__main__':
|
|
|
6 |
from PIL import Image, ImageDraw, ImageFont
|
7 |
import numpy as np
|
8 |
import warnings
|
9 |
+
from transformers import AutoProcessor, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
10 |
+
|
11 |
+
try:
|
12 |
+
from vouchervision.utils_LLM import SystemLoadMonitor
|
13 |
+
except:
|
14 |
+
from utils_LLM import SystemLoadMonitor
|
15 |
+
|
16 |
|
17 |
warnings.filterwarnings("ignore", category=UserWarning, message="TypedStorage is deprecated")
|
18 |
|
19 |
class FlorenceOCR:
|
20 |
+
# def __init__(self, logger, model_id='microsoft/Florence-2-base'):
|
21 |
def __init__(self, logger, model_id='microsoft/Florence-2-large'):
|
22 |
self.MAX_TOKENS = 1024
|
23 |
self.logger = logger
|
|
|
31 |
# self.model_id_clean = "mistralai/Mistral-7B-v0.3"
|
32 |
self.model_id_clean = "unsloth/mistral-7b-instruct-v0.3-bnb-4bit"
|
33 |
self.tokenizer_clean = AutoTokenizer.from_pretrained(self.model_id_clean)
|
34 |
+
# Configuring the BitsAndBytesConfig for quantization
|
35 |
+
quant_config = BitsAndBytesConfig(
|
36 |
+
load_in_4bit=True,
|
37 |
+
quant_method="bnb",
|
38 |
+
)
|
39 |
+
self.model_clean = AutoModelForCausalLM.from_pretrained(
|
40 |
+
self.model_id_clean,
|
41 |
+
quantization_config=quant_config,
|
42 |
+
low_cpu_mem_usage=True,)
|
43 |
|
44 |
|
45 |
def ocr_florence(self, image, task_prompt='<OCR>', text_input=None):
|
|
|
68 |
num_beams=3,
|
69 |
)
|
70 |
generated_text = self.processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
|
71 |
+
parsed_answer_dict = self.processor.post_process_generation(
|
72 |
generated_text,
|
73 |
task=task_prompt,
|
74 |
image_size=(image.width, image.height)
|
75 |
)
|
76 |
|
77 |
+
parsed_answer_text = parsed_answer_dict[task_prompt]
|
78 |
+
|
79 |
+
# Prepare input for the second model
|
80 |
+
inputs_clean = self.tokenizer_clean(
|
81 |
+
f"Insert spaces into this text to make all the words valid. This text contains scientific names of plants, locations, habitat, coordinate words: {parsed_answer_text}",
|
82 |
+
return_tensors="pt"
|
83 |
+
)
|
84 |
+
inputs_clean = {key: value.to(self.model_clean.device) for key, value in inputs_clean.items()}
|
85 |
+
|
86 |
+
outputs_clean = self.model_clean.generate(**inputs_clean, max_new_tokens=self.MAX_TOKENS)
|
87 |
+
text_with_spaces = self.tokenizer_clean.decode(outputs_clean[0], skip_special_tokens=True)
|
88 |
+
|
89 |
+
# Extract only the LLM response from the decoded text
|
90 |
+
response_start = text_with_spaces.find(parsed_answer_text)
|
91 |
+
if response_start != -1:
|
92 |
+
text_with_spaces = text_with_spaces[response_start + len(parsed_answer_text):].strip()
|
93 |
|
94 |
+
print(text_with_spaces)
|
|
|
|
|
|
|
95 |
|
96 |
self.monitor.stop_inference_timer() # Starts tool timer too
|
97 |
usage_report = self.monitor.stop_monitoring_report_usage()
|
98 |
|
99 |
+
return text_with_spaces, parsed_answer_text, parsed_answer_dict, usage_report
|
100 |
|
101 |
|
102 |
def main():
|
103 |
+
# img_path = '/home/brlab/Downloads/gem_2024_06_26__02-26-02/Cropped_Images/By_Class/label/1.jpg'
|
104 |
+
img_path = 'D:/D_Desktop/BR_1839468565_Ochnaceae_Campylospermum_reticulatum_label.jpg'
|
105 |
|
106 |
image = Image.open(img_path)
|
107 |
|
108 |
+
# ocr = FlorenceOCR(logger = None, model_id='microsoft/Florence-2-base')
|
109 |
+
ocr = FlorenceOCR(logger = None, model_id='microsoft/Florence-2-large')
|
110 |
+
results_text, results_all, results_dirty, usage_report = ocr.ocr_florence(image, task_prompt='<OCR>', text_input=None)
|
111 |
print(results_text)
|
112 |
|
113 |
if __name__ == '__main__':
|
vouchervision/OCR_GPT4oMini.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os, base64, requests, yaml
|
2 |
+
from PIL import Image
|
3 |
+
from openai import OpenAI
|
4 |
+
|
5 |
+
from general_utils import calculate_cost
|
6 |
+
|
7 |
+
# PROMPT = """Please perform OCR on this scientific image and extract the printed and handwritten text verbatim. Do not explain your answer, only return the verbatim text in this JSON dictionary format: {'printed_text': '', 'handwritten_text': ''}"""
|
8 |
+
PROMPT = """Please perform OCR on this scientific image and extract all of the words and text verbatim. Do not explain your answer, only return the verbatim text:"""
|
9 |
+
|
10 |
+
class GPT4oMiniOCR:
|
11 |
+
def __init__(self, api_key):
|
12 |
+
self.api_key = api_key
|
13 |
+
self.path_api_cost = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'api_cost', 'api_cost.yaml')
|
14 |
+
|
15 |
+
|
16 |
+
def encode_image(self, image_path):
|
17 |
+
with open(image_path, "rb") as image_file:
|
18 |
+
return base64.b64encode(image_file.read()).decode('utf-8')
|
19 |
+
|
20 |
+
def ocr_gpt4o(self, image_path, resolution="low", max_tokens=512):
|
21 |
+
# Getting the base64 string
|
22 |
+
base64_image = self.encode_image(image_path)
|
23 |
+
|
24 |
+
headers = {
|
25 |
+
"Content-Type": "application/json",
|
26 |
+
"Authorization": f"Bearer {self.api_key}"
|
27 |
+
}
|
28 |
+
|
29 |
+
payload = {
|
30 |
+
"model": "gpt-4o-mini",
|
31 |
+
"messages": [
|
32 |
+
{
|
33 |
+
"role": "user",
|
34 |
+
"content": [
|
35 |
+
{
|
36 |
+
"type": "text",
|
37 |
+
"text": PROMPT,
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"type": "image_url",
|
41 |
+
"image_url": {
|
42 |
+
"url": f"data:image/jpeg;base64,{base64_image}",
|
43 |
+
"detail": resolution,
|
44 |
+
}
|
45 |
+
}
|
46 |
+
]
|
47 |
+
}
|
48 |
+
],
|
49 |
+
"max_tokens": max_tokens
|
50 |
+
}
|
51 |
+
|
52 |
+
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
|
53 |
+
response_json = response.json()
|
54 |
+
|
55 |
+
if "choices" in response_json :
|
56 |
+
parsed_answer = response_json["choices"][0]["message"]["content"]
|
57 |
+
else:
|
58 |
+
parsed_answer = None
|
59 |
+
|
60 |
+
usage_report = response_json.get('usage', {})
|
61 |
+
tokens_in = usage_report["prompt_tokens"]
|
62 |
+
tokens_out = usage_report["completion_tokens"]
|
63 |
+
|
64 |
+
total_cost = calculate_cost('GPT_4o_mini_2024_07_18', self.path_api_cost, tokens_in, tokens_out)
|
65 |
+
cost_in, cost_out, total_cost, rates_in, rates_out = total_cost
|
66 |
+
|
67 |
+
return parsed_answer, cost_in, cost_out, total_cost, rates_in, rates_out, tokens_in, tokens_out
|
68 |
+
|
69 |
+
|
70 |
+
|
71 |
+
|
72 |
+
def main():
|
73 |
+
# img_path = '/home/brlab/Downloads/gem_2024_06_26__02-26-02/Cropped_Images/By_Class/label/1.jpg'
|
74 |
+
img_path = 'D:/D_Desktop/BR_1839468565_Ochnaceae_Campylospermum_reticulatum_label.jpg'
|
75 |
+
|
76 |
+
# $env:OPENAI_API_KEY="KEY"
|
77 |
+
API_KEY = "sk-proj-DxHlMH1H6jZzs8V12qbLT3BlbkFJIJnAVzt4kquOfhGURGW0"
|
78 |
+
|
79 |
+
|
80 |
+
ocr = GPT4oMiniOCR(API_KEY)
|
81 |
+
|
82 |
+
parsed_answer, cost_in, cost_out, total_cost, rates_in, rates_out, tokens_in, tokens_out = ocr.ocr_gpt4o(img_path, resolution="low", max_tokens=512)
|
83 |
+
print(f"Parsed Answer: {parsed_answer}")
|
84 |
+
print(f"Total Cost: {total_cost}")
|
85 |
+
|
86 |
+
parsed_answer, cost_in, cost_out, total_cost, rates_in, rates_out, tokens_in, tokens_out = ocr.ocr_gpt4o(img_path, resolution="high", max_tokens=512)
|
87 |
+
print(f"Parsed Answer: {parsed_answer}")
|
88 |
+
print(f"Total Cost: {total_cost}")
|
89 |
+
|
90 |
+
|
91 |
+
|
92 |
+
|
93 |
+
if __name__ == '__main__':
|
94 |
+
main()
|
vouchervision/OCR_google_cloud_vision.py
CHANGED
@@ -8,6 +8,7 @@ import colorsys
|
|
8 |
from tqdm import tqdm
|
9 |
from google.oauth2 import service_account
|
10 |
from OCR_Florence_2 import FlorenceOCR
|
|
|
11 |
### LLaVA should only be installed if the user will actually use it.
|
12 |
### It requires the most recent pytorch/Python and can mess with older systems
|
13 |
|
@@ -56,6 +57,11 @@ class OCREngine:
|
|
56 |
|
57 |
self.OCR_JSON_to_file = {}
|
58 |
|
|
|
|
|
|
|
|
|
|
|
59 |
self.hand_cleaned_text = None
|
60 |
self.hand_organized_text = None
|
61 |
self.hand_bounds = None
|
@@ -84,6 +90,7 @@ class OCREngine:
|
|
84 |
self.trOCR_characters = None
|
85 |
self.set_client()
|
86 |
self.init_florence()
|
|
|
87 |
self.init_craft()
|
88 |
|
89 |
self.multimodal_prompt = """I need you to transcribe all of the text in this image.
|
@@ -125,6 +132,10 @@ class OCREngine:
|
|
125 |
if 'Florence-2' in self.OCR_option:
|
126 |
self.Florence = FlorenceOCR(logger=self.logger, model_id=self.cfg['leafmachine']['project']['florence_model_path'])
|
127 |
|
|
|
|
|
|
|
|
|
128 |
def init_llava(self):
|
129 |
if 'LLaVA' in self.OCR_option:
|
130 |
from vouchervision.OCR_llava import OCRllava
|
@@ -701,7 +712,7 @@ class OCREngine:
|
|
701 |
|
702 |
if 'LLaVA' in self.OCR_option: # This option does not produce an OCR helper image
|
703 |
if self.json_report:
|
704 |
-
self.json_report.set_text(text_main=f'Working on LLaVA {self.Llava.model_path}
|
705 |
|
706 |
image, json_output, direct_output, str_output, usage_report = self.Llava.transcribe_image(self.path, self.multimodal_prompt)
|
707 |
self.logger.info(f"LLaVA Usage Report for Model {self.Llava.model_path}:\n{usage_report}")
|
@@ -716,7 +727,7 @@ class OCREngine:
|
|
716 |
|
717 |
if 'Florence-2' in self.OCR_option: # This option does not produce an OCR helper image
|
718 |
if self.json_report:
|
719 |
-
self.json_report.set_text(text_main=f'Working on Florence-2 [{self.Florence.model_id}]
|
720 |
|
721 |
self.logger.info(f"Florence-2 Usage Report for Model [{self.Florence.model_id}]")
|
722 |
results_text, results_text_dirty, results, usage_report = self.Florence.ocr_florence(self.path, task_prompt='<OCR>', text_input=None)
|
@@ -728,6 +739,21 @@ class OCREngine:
|
|
728 |
else:
|
729 |
self.OCR = self.OCR + f"\nFlorence-2 OCR:\n{results_text}"
|
730 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
731 |
if 'normal' in self.OCR_option or 'hand' in self.OCR_option:
|
732 |
if 'normal' in self.OCR_option:
|
733 |
if self.double_OCR:
|
@@ -824,48 +850,44 @@ class SafetyCheck():
|
|
824 |
else:
|
825 |
self.client = vision.ImageAnnotatorClient(credentials=self.get_google_credentials())
|
826 |
|
|
|
827 |
def get_google_credentials(self):
|
828 |
creds_json_str = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
|
829 |
credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
|
830 |
return credentials
|
831 |
|
832 |
def check_for_inappropriate_content(self, file_stream):
|
833 |
-
|
834 |
-
|
835 |
-
|
836 |
-
|
837 |
-
|
838 |
-
|
839 |
-
|
840 |
-
|
841 |
-
|
842 |
-
|
843 |
-
|
844 |
-
|
845 |
-
|
846 |
-
|
847 |
-
|
848 |
-
|
849 |
-
|
850 |
-
|
851 |
-
|
852 |
-
|
853 |
-
|
854 |
-
|
855 |
-
|
856 |
-
|
857 |
-
|
858 |
-
#
|
859 |
-
|
860 |
-
|
861 |
-
|
862 |
-
|
863 |
-
|
864 |
-
|
865 |
-
|
866 |
-
|
867 |
-
|
868 |
-
print("Found NO violation")
|
869 |
-
return False # The image is considered safe.
|
870 |
-
except:
|
871 |
-
return False # The image is considered safe. TEMPOROARY FIX TODO
|
|
|
8 |
from tqdm import tqdm
|
9 |
from google.oauth2 import service_account
|
10 |
from OCR_Florence_2 import FlorenceOCR
|
11 |
+
from OCR_GPT4oMini import GPT4oMiniOCR
|
12 |
### LLaVA should only be installed if the user will actually use it.
|
13 |
### It requires the most recent pytorch/Python and can mess with older systems
|
14 |
|
|
|
57 |
|
58 |
self.OCR_JSON_to_file = {}
|
59 |
|
60 |
+
# for paid vLM OCR like GPT-vision
|
61 |
+
self.cost = 0.0
|
62 |
+
self.tokens_in = 0
|
63 |
+
self.tokens_out = 0
|
64 |
+
|
65 |
self.hand_cleaned_text = None
|
66 |
self.hand_organized_text = None
|
67 |
self.hand_bounds = None
|
|
|
90 |
self.trOCR_characters = None
|
91 |
self.set_client()
|
92 |
self.init_florence()
|
93 |
+
self.init_gpt_4o_mini()
|
94 |
self.init_craft()
|
95 |
|
96 |
self.multimodal_prompt = """I need you to transcribe all of the text in this image.
|
|
|
132 |
if 'Florence-2' in self.OCR_option:
|
133 |
self.Florence = FlorenceOCR(logger=self.logger, model_id=self.cfg['leafmachine']['project']['florence_model_path'])
|
134 |
|
135 |
+
def init_gpt_4o_mini(self):
|
136 |
+
if 'GPT-4o-mini' in self.OCR_option:
|
137 |
+
self.GPTmini = GPT4oMiniOCR(api_key = os.getenv('OPENAI_API_KEY'))
|
138 |
+
|
139 |
def init_llava(self):
|
140 |
if 'LLaVA' in self.OCR_option:
|
141 |
from vouchervision.OCR_llava import OCRllava
|
|
|
712 |
|
713 |
if 'LLaVA' in self.OCR_option: # This option does not produce an OCR helper image
|
714 |
if self.json_report:
|
715 |
+
self.json_report.set_text(text_main=f'Working on LLaVA {self.Llava.model_path} OCR :construction:')
|
716 |
|
717 |
image, json_output, direct_output, str_output, usage_report = self.Llava.transcribe_image(self.path, self.multimodal_prompt)
|
718 |
self.logger.info(f"LLaVA Usage Report for Model {self.Llava.model_path}:\n{usage_report}")
|
|
|
727 |
|
728 |
if 'Florence-2' in self.OCR_option: # This option does not produce an OCR helper image
|
729 |
if self.json_report:
|
730 |
+
self.json_report.set_text(text_main=f'Working on Florence-2 [{self.Florence.model_id}] OCR :construction:')
|
731 |
|
732 |
self.logger.info(f"Florence-2 Usage Report for Model [{self.Florence.model_id}]")
|
733 |
results_text, results_text_dirty, results, usage_report = self.Florence.ocr_florence(self.path, task_prompt='<OCR>', text_input=None)
|
|
|
739 |
else:
|
740 |
self.OCR = self.OCR + f"\nFlorence-2 OCR:\n{results_text}"
|
741 |
|
742 |
+
if 'GPT-4o-mini' in self.OCR_option: # This option does not produce an OCR helper image
|
743 |
+
if self.json_report:
|
744 |
+
self.json_report.set_text(text_main=f'Working on GPT-4o-mini OCR :construction:')
|
745 |
+
|
746 |
+
self.logger.info(f"GPT-4o-mini Usage Report")
|
747 |
+
results_text, cost_in, cost_out, total_cost, rates_in, rates_out, self.tokens_in, self.tokens_out = self.GPTmini.ocr_gpt4o(self.path, resolution=self.cfg['leafmachine']['project']['OCR_GPT_4o_mini_resolution'], max_tokens=512)
|
748 |
+
self.cost += total_cost
|
749 |
+
|
750 |
+
self.OCR_JSON_to_file['OCR_GPT_4o_mini'] = results_text
|
751 |
+
|
752 |
+
if self.double_OCR:
|
753 |
+
self.OCR = self.OCR + f"\nGPT-4o-mini OCR:\n{results_text}" + f"\nGPT-4o-mini OCR:\n{results_text}"
|
754 |
+
else:
|
755 |
+
self.OCR = self.OCR + f"\nGPT-4o-mini OCR:\n{results_text}"
|
756 |
+
|
757 |
if 'normal' in self.OCR_option or 'hand' in self.OCR_option:
|
758 |
if 'normal' in self.OCR_option:
|
759 |
if self.double_OCR:
|
|
|
850 |
else:
|
851 |
self.client = vision.ImageAnnotatorClient(credentials=self.get_google_credentials())
|
852 |
|
853 |
+
|
854 |
def get_google_credentials(self):
|
855 |
creds_json_str = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
|
856 |
credentials = service_account.Credentials.from_service_account_info(json.loads(creds_json_str))
|
857 |
return credentials
|
858 |
|
859 |
def check_for_inappropriate_content(self, file_stream):
|
860 |
+
LEVEL = 2
|
861 |
+
content = file_stream.read()
|
862 |
+
image = vision.Image(content=content)
|
863 |
+
response = self.client.safe_search_detection(image=image)
|
864 |
+
safe = response.safe_search_annotation
|
865 |
+
|
866 |
+
likelihood_name = (
|
867 |
+
"UNKNOWN",
|
868 |
+
"VERY_UNLIKELY",
|
869 |
+
"UNLIKELY",
|
870 |
+
"POSSIBLE",
|
871 |
+
"LIKELY",
|
872 |
+
"VERY_LIKELY",
|
873 |
+
)
|
874 |
+
print("Safe search:")
|
875 |
+
|
876 |
+
print(f" adult*: {likelihood_name[safe.adult]}")
|
877 |
+
print(f" medical*: {likelihood_name[safe.medical]}")
|
878 |
+
print(f" spoofed: {likelihood_name[safe.spoof]}")
|
879 |
+
print(f" violence*: {likelihood_name[safe.violence]}")
|
880 |
+
print(f" racy: {likelihood_name[safe.racy]}")
|
881 |
+
|
882 |
+
# Check the levels of adult, violence, racy, etc. content.
|
883 |
+
if (safe.adult > LEVEL or
|
884 |
+
safe.medical > LEVEL or
|
885 |
+
# safe.spoof > LEVEL or
|
886 |
+
safe.violence > LEVEL #or
|
887 |
+
# safe.racy > LEVEL
|
888 |
+
):
|
889 |
+
print("Found violation")
|
890 |
+
return True # The image violates safe search guidelines.
|
891 |
+
|
892 |
+
print("Found NO violation")
|
893 |
+
return False # The image is considered safe.
|
|
|
|
|
|
|
|
|
|
vouchervision/VoucherVision_Config_Builder.py
CHANGED
@@ -42,6 +42,7 @@ def build_VV_config(loaded_cfg=None):
|
|
42 |
OCR_option = 'hand'
|
43 |
OCR_option_llava = 'llava-v1.6-mistral-7b' # "llava-v1.6-mistral-7b", "llava-v1.6-34b", "llava-v1.6-vicuna-13b", "llava-v1.6-vicuna-7b",
|
44 |
OCR_option_llava_bit = 'full' # full or 4bit
|
|
|
45 |
double_OCR = False
|
46 |
|
47 |
tool_GEO = True
|
@@ -73,7 +74,7 @@ def build_VV_config(loaded_cfg=None):
|
|
73 |
prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,num_workers,
|
74 |
path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
|
75 |
prompt_version, do_create_OCR_helper_image, do_use_trOCR, do_use_florence, trOCR_model_path, florence_model_path, OCR_option, OCR_option_llava,
|
76 |
-
OCR_option_llava_bit, double_OCR, save_cropped_annotations,
|
77 |
tool_GEO, tool_WFO, tool_wikipedia,
|
78 |
check_for_illegal_filenames, skip_vertical, pdf_conversion_dpi, use_domain_knowledge=False)
|
79 |
else:
|
@@ -95,6 +96,7 @@ def build_VV_config(loaded_cfg=None):
|
|
95 |
OCR_option = loaded_cfg['leafmachine']['project']['OCR_option']
|
96 |
OCR_option_llava = loaded_cfg['leafmachine']['project']['OCR_option_llava']
|
97 |
OCR_option_llava_bit = loaded_cfg['leafmachine']['project']['OCR_option_llava_bit']
|
|
|
98 |
double_OCR = loaded_cfg['leafmachine']['project']['double_OCR']
|
99 |
|
100 |
tool_GEO = loaded_cfg['leafmachine']['project']['tool_GEO']
|
@@ -122,7 +124,7 @@ def build_VV_config(loaded_cfg=None):
|
|
122 |
prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,num_workers,
|
123 |
path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
|
124 |
prompt_version, do_create_OCR_helper_image, do_use_trOCR, do_use_florence, trOCR_model_path, florence_model_path, OCR_option, OCR_option_llava,
|
125 |
-
OCR_option_llava_bit, double_OCR, save_cropped_annotations,
|
126 |
tool_GEO, tool_WFO, tool_wikipedia,
|
127 |
check_for_illegal_filenames, skip_vertical, pdf_conversion_dpi, use_domain_knowledge=False)
|
128 |
|
@@ -131,7 +133,7 @@ def assemble_config(dir_home, run_name, dir_images_local,dir_output,
|
|
131 |
prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,num_workers,
|
132 |
path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
|
133 |
prompt_version, do_create_OCR_helper_image_user, do_use_trOCR, do_use_florence, trOCR_model_path, florence_model_path, OCR_option, OCR_option_llava,
|
134 |
-
OCR_option_llava_bit, double_OCR, save_cropped_annotations,
|
135 |
tool_GEO, tool_WFO, tool_wikipedia,
|
136 |
check_for_illegal_filenames, skip_vertical, pdf_conversion_dpi, use_domain_knowledge=False):
|
137 |
|
@@ -183,6 +185,7 @@ def assemble_config(dir_home, run_name, dir_images_local,dir_output,
|
|
183 |
'OCR_option': OCR_option,
|
184 |
'OCR_option_llava': OCR_option_llava,
|
185 |
'OCR_option_llava_bit': OCR_option_llava_bit,
|
|
|
186 |
'double_OCR': double_OCR,
|
187 |
'pdf_conversion_dpi': pdf_conversion_dpi,
|
188 |
'tool_GEO': tool_GEO,
|
|
|
42 |
OCR_option = 'hand'
|
43 |
OCR_option_llava = 'llava-v1.6-mistral-7b' # "llava-v1.6-mistral-7b", "llava-v1.6-34b", "llava-v1.6-vicuna-13b", "llava-v1.6-vicuna-7b",
|
44 |
OCR_option_llava_bit = 'full' # full or 4bit
|
45 |
+
OCR_GPT_4o_mini_resolution = 'high'
|
46 |
double_OCR = False
|
47 |
|
48 |
tool_GEO = True
|
|
|
74 |
prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,num_workers,
|
75 |
path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
|
76 |
prompt_version, do_create_OCR_helper_image, do_use_trOCR, do_use_florence, trOCR_model_path, florence_model_path, OCR_option, OCR_option_llava,
|
77 |
+
OCR_option_llava_bit, OCR_GPT_4o_mini_resolution, double_OCR, save_cropped_annotations,
|
78 |
tool_GEO, tool_WFO, tool_wikipedia,
|
79 |
check_for_illegal_filenames, skip_vertical, pdf_conversion_dpi, use_domain_knowledge=False)
|
80 |
else:
|
|
|
96 |
OCR_option = loaded_cfg['leafmachine']['project']['OCR_option']
|
97 |
OCR_option_llava = loaded_cfg['leafmachine']['project']['OCR_option_llava']
|
98 |
OCR_option_llava_bit = loaded_cfg['leafmachine']['project']['OCR_option_llava_bit']
|
99 |
+
OCR_GPT_4o_mini_resolution = loaded_cfg['leafmachine']['project']['OCR_GPT_4o_mini_resolution']
|
100 |
double_OCR = loaded_cfg['leafmachine']['project']['double_OCR']
|
101 |
|
102 |
tool_GEO = loaded_cfg['leafmachine']['project']['tool_GEO']
|
|
|
124 |
prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,num_workers,
|
125 |
path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
|
126 |
prompt_version, do_create_OCR_helper_image, do_use_trOCR, do_use_florence, trOCR_model_path, florence_model_path, OCR_option, OCR_option_llava,
|
127 |
+
OCR_option_llava_bit, OCR_GPT_4o_mini_resolution, double_OCR, save_cropped_annotations,
|
128 |
tool_GEO, tool_WFO, tool_wikipedia,
|
129 |
check_for_illegal_filenames, skip_vertical, pdf_conversion_dpi, use_domain_knowledge=False)
|
130 |
|
|
|
133 |
prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,num_workers,
|
134 |
path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
|
135 |
prompt_version, do_create_OCR_helper_image_user, do_use_trOCR, do_use_florence, trOCR_model_path, florence_model_path, OCR_option, OCR_option_llava,
|
136 |
+
OCR_option_llava_bit, OCR_GPT_4o_mini_resolution, double_OCR, save_cropped_annotations,
|
137 |
tool_GEO, tool_WFO, tool_wikipedia,
|
138 |
check_for_illegal_filenames, skip_vertical, pdf_conversion_dpi, use_domain_knowledge=False):
|
139 |
|
|
|
185 |
'OCR_option': OCR_option,
|
186 |
'OCR_option_llava': OCR_option_llava,
|
187 |
'OCR_option_llava_bit': OCR_option_llava_bit,
|
188 |
+
'OCR_GPT_4o_mini_resolution': OCR_GPT_4o_mini_resolution,
|
189 |
'double_OCR': double_OCR,
|
190 |
'pdf_conversion_dpi': pdf_conversion_dpi,
|
191 |
'tool_GEO': tool_GEO,
|
vouchervision/general_utils.py
CHANGED
@@ -10,7 +10,11 @@ import concurrent.futures
|
|
10 |
from time import perf_counter
|
11 |
import torch
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
14 |
|
15 |
'''
|
16 |
TIFF --> DNG
|
@@ -65,12 +69,12 @@ def add_to_expense_report(dir_home, data):
|
|
65 |
|
66 |
# If the file does not exist, write the header first
|
67 |
if not file_exists:
|
68 |
-
writer.writerow(['run','date','api_version','total_cost', 'n_images', 'tokens_in', 'tokens_out', 'rate_in', 'rate_out', 'cost_in', 'cost_out',])
|
69 |
|
70 |
# Write the data row
|
71 |
writer.writerow(data)
|
72 |
|
73 |
-
def save_token_info_as_csv(Dirs, LLM_version0, path_api_cost, total_tokens_in, total_tokens_out, n_images, dir_home, logger):
|
74 |
if path_api_cost:
|
75 |
LLM_version = ModelMaps.get_version_mapping_cost(LLM_version0)
|
76 |
|
@@ -78,16 +82,18 @@ def save_token_info_as_csv(Dirs, LLM_version0, path_api_cost, total_tokens_in, t
|
|
78 |
csv_file_path = os.path.join(Dirs.path_cost, Dirs.run_name + '.csv')
|
79 |
|
80 |
cost_in, cost_out, total_cost, rate_in, rate_out = calculate_cost(LLM_version, path_api_cost, total_tokens_in, total_tokens_out)
|
|
|
|
|
81 |
|
82 |
# The data to be written to the CSV file
|
83 |
-
data = [Dirs.run_name, get_datetime(),LLM_version, total_cost, n_images, total_tokens_in, total_tokens_out, rate_in, rate_out, cost_in, cost_out,]
|
84 |
|
85 |
# Open the file in write mode
|
86 |
with open(csv_file_path, mode='w', newline='') as file:
|
87 |
writer = csv.writer(file)
|
88 |
|
89 |
# Write the header
|
90 |
-
writer.writerow(['run','date','api_version','total_cost', 'n_images', 'tokens_in', 'tokens_out', 'rate_in', 'rate_out', 'cost_in', 'cost_out',])
|
91 |
|
92 |
# Write the data
|
93 |
writer.writerow(data)
|
@@ -119,6 +125,11 @@ def summarize_expense_report(path_expense_report):
|
|
119 |
cost_in_sum = 0
|
120 |
cost_out_sum = 0
|
121 |
n_images_sum = 0
|
|
|
|
|
|
|
|
|
|
|
122 |
api_version_counts = Counter()
|
123 |
|
124 |
# Try to read the CSV file into a DataFrame
|
@@ -128,7 +139,7 @@ def summarize_expense_report(path_expense_report):
|
|
128 |
# Process each row in the DataFrame
|
129 |
for index, row in df.iterrows():
|
130 |
run_count += 1
|
131 |
-
total_cost_sum += row['total_cost']
|
132 |
tokens_in_sum += row['tokens_in']
|
133 |
tokens_out_sum += row['tokens_out']
|
134 |
rate_in_sum += row['rate_in']
|
@@ -136,6 +147,9 @@ def summarize_expense_report(path_expense_report):
|
|
136 |
cost_in_sum += row['cost_in']
|
137 |
cost_out_sum += row['cost_out']
|
138 |
n_images_sum += row['n_images']
|
|
|
|
|
|
|
139 |
api_version_counts[row['api_version']] += 1
|
140 |
|
141 |
except FileNotFoundError:
|
@@ -163,6 +177,9 @@ def summarize_expense_report(path_expense_report):
|
|
163 |
'rate_out_sum': rate_out_sum,
|
164 |
'cost_in_sum': cost_in_sum,
|
165 |
'cost_out_sum': cost_out_sum,
|
|
|
|
|
|
|
166 |
'n_images_sum':n_images_sum,
|
167 |
'api_version_percentages': api_version_percentages,
|
168 |
'cost_per_image': cost_per_image_dict
|
|
|
10 |
from time import perf_counter
|
11 |
import torch
|
12 |
|
13 |
+
try:
|
14 |
+
from vouchervision.model_maps import ModelMaps
|
15 |
+
except:
|
16 |
+
from model_maps import ModelMaps
|
17 |
+
|
18 |
|
19 |
'''
|
20 |
TIFF --> DNG
|
|
|
69 |
|
70 |
# If the file does not exist, write the header first
|
71 |
if not file_exists:
|
72 |
+
writer.writerow(['run','date','api_version','total_cost', 'n_images', 'tokens_in', 'tokens_out', 'rate_in', 'rate_out', 'cost_in', 'cost_out','ocr_cost','ocr_tokens_in', 'ocr_tokens_out',])
|
73 |
|
74 |
# Write the data row
|
75 |
writer.writerow(data)
|
76 |
|
77 |
+
def save_token_info_as_csv(Dirs, LLM_version0, path_api_cost, total_tokens_in, total_tokens_out, OCR_cost, OCR_tokens_in, OCR_tokens_out, n_images, dir_home, logger):
|
78 |
if path_api_cost:
|
79 |
LLM_version = ModelMaps.get_version_mapping_cost(LLM_version0)
|
80 |
|
|
|
82 |
csv_file_path = os.path.join(Dirs.path_cost, Dirs.run_name + '.csv')
|
83 |
|
84 |
cost_in, cost_out, total_cost, rate_in, rate_out = calculate_cost(LLM_version, path_api_cost, total_tokens_in, total_tokens_out)
|
85 |
+
|
86 |
+
total_cost += OCR_cost
|
87 |
|
88 |
# The data to be written to the CSV file
|
89 |
+
data = [Dirs.run_name, get_datetime(),LLM_version, total_cost, n_images, total_tokens_in, total_tokens_out, rate_in, rate_out, cost_in, cost_out,OCR_cost, OCR_tokens_in, OCR_tokens_out,]
|
90 |
|
91 |
# Open the file in write mode
|
92 |
with open(csv_file_path, mode='w', newline='') as file:
|
93 |
writer = csv.writer(file)
|
94 |
|
95 |
# Write the header
|
96 |
+
writer.writerow(['run','date','api_version','total_cost', 'n_images', 'tokens_in', 'tokens_out', 'rate_in', 'rate_out', 'cost_in', 'cost_out','ocr_cost','ocr_tokens_in', 'ocr_tokens_out'])
|
97 |
|
98 |
# Write the data
|
99 |
writer.writerow(data)
|
|
|
125 |
cost_in_sum = 0
|
126 |
cost_out_sum = 0
|
127 |
n_images_sum = 0
|
128 |
+
# ,'ocr_cost','ocr_tokens_in', 'ocr_tokens_out'
|
129 |
+
ocr_cost_sum = 0
|
130 |
+
ocr_tokens_in_sum = 0
|
131 |
+
ocr_tokens_out_sum = 0
|
132 |
+
|
133 |
api_version_counts = Counter()
|
134 |
|
135 |
# Try to read the CSV file into a DataFrame
|
|
|
139 |
# Process each row in the DataFrame
|
140 |
for index, row in df.iterrows():
|
141 |
run_count += 1
|
142 |
+
total_cost_sum += row['total_cost'] + row['ocr_cost']
|
143 |
tokens_in_sum += row['tokens_in']
|
144 |
tokens_out_sum += row['tokens_out']
|
145 |
rate_in_sum += row['rate_in']
|
|
|
147 |
cost_in_sum += row['cost_in']
|
148 |
cost_out_sum += row['cost_out']
|
149 |
n_images_sum += row['n_images']
|
150 |
+
ocr_cost_sum += row['ocr_cost']
|
151 |
+
ocr_tokens_in_sum += row['ocr_tokens_in']
|
152 |
+
ocr_tokens_out_sum += row['ocr_tokens_out']
|
153 |
api_version_counts[row['api_version']] += 1
|
154 |
|
155 |
except FileNotFoundError:
|
|
|
177 |
'rate_out_sum': rate_out_sum,
|
178 |
'cost_in_sum': cost_in_sum,
|
179 |
'cost_out_sum': cost_out_sum,
|
180 |
+
'ocr_cost_sum': ocr_cost_sum,
|
181 |
+
'ocr_tokens_in_sum': ocr_tokens_in_sum,
|
182 |
+
'ocr_tokens_out_sum': ocr_tokens_out_sum,
|
183 |
'n_images_sum':n_images_sum,
|
184 |
'api_version_percentages': api_version_percentages,
|
185 |
'cost_per_image': cost_per_image_dict
|
vouchervision/model_maps.py
CHANGED
@@ -40,23 +40,27 @@ class ModelMaps:
|
|
40 |
'phyloforfun/mistral-7b-instruct-v2-bnb-4bit__HLT_MICH_Angiospermae_SLTPvC_v1-0_medium_OCR-C25-L25-E50-R05': '#bababa', # Gray
|
41 |
}
|
42 |
|
43 |
-
MODELS_OPENAI = [
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
54 |
# "Azure GPT 4 32k",
|
55 |
# "Azure GPT 4 Turbo 0125-preview",
|
56 |
# "Azure GPT 4 Turbo 1106-preview",
|
57 |
# "Azure GPT 3.5 Turbo",
|
58 |
# "Azure GPT 3.5 Instruct",
|
59 |
-
|
60 |
|
61 |
MODELS_GOOGLE = [
|
62 |
# "PaLM 2 text-bison@001",
|
@@ -79,7 +83,14 @@ class ModelMaps:
|
|
79 |
"LOCAL CPU Mistral 7B Instruct v0.2 GGUF",
|
80 |
'phyloforfun/mistral-7b-instruct-v2-bnb-4bit__HLT_MICH_Angiospermae_SLTPvC_v1-0_medium_OCR-C25-L25-E50-R05']
|
81 |
|
82 |
-
MODELS_GUI_DEFAULT = "Azure GPT 4" # "GPT 4 Turbo 1106-preview"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
version_mapping_cost = {
|
85 |
'GPT 4 32k': 'GPT_4_32K',
|
@@ -316,7 +327,16 @@ class ModelMaps:
|
|
316 |
|
317 |
@classmethod
|
318 |
def get_models_gui_list(cls):
|
319 |
-
return cls.MODELS_LOCAL + cls.MODELS_GOOGLE + cls.MODELS_OPENAI + cls.MODELS_MISTRAL
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
320 |
|
321 |
@classmethod
|
322 |
def get_version_mapping_cost(cls, key):
|
|
|
40 |
'phyloforfun/mistral-7b-instruct-v2-bnb-4bit__HLT_MICH_Angiospermae_SLTPvC_v1-0_medium_OCR-C25-L25-E50-R05': '#bababa', # Gray
|
41 |
}
|
42 |
|
43 |
+
MODELS_OPENAI = [
|
44 |
+
"GPT 4o 2024-05-13", #GPT_4o_2024_05_13
|
45 |
+
"GPT 4o mini 2024-07-18",
|
46 |
+
"GPT 4 Turbo 2024-04-09",#GPT_4_TURBO_2024_04_09
|
47 |
+
"GPT 4",
|
48 |
+
"GPT 4 32k",
|
49 |
+
"GPT 4 Turbo 0125-preview",
|
50 |
+
"GPT 4 Turbo 1106-preview",
|
51 |
+
"GPT 3.5 Turbo",
|
52 |
+
"GPT 3.5 Instruct",
|
53 |
+
]
|
54 |
+
|
55 |
+
|
56 |
+
MODELS_OPENAI_AZURE = [
|
57 |
+
"Azure GPT 4",
|
58 |
# "Azure GPT 4 32k",
|
59 |
# "Azure GPT 4 Turbo 0125-preview",
|
60 |
# "Azure GPT 4 Turbo 1106-preview",
|
61 |
# "Azure GPT 3.5 Turbo",
|
62 |
# "Azure GPT 3.5 Instruct",
|
63 |
+
]
|
64 |
|
65 |
MODELS_GOOGLE = [
|
66 |
# "PaLM 2 text-bison@001",
|
|
|
83 |
"LOCAL CPU Mistral 7B Instruct v0.2 GGUF",
|
84 |
'phyloforfun/mistral-7b-instruct-v2-bnb-4bit__HLT_MICH_Angiospermae_SLTPvC_v1-0_medium_OCR-C25-L25-E50-R05']
|
85 |
|
86 |
+
MODELS_GUI_DEFAULT = "Gemini 1.5 Flash" #"Azure GPT 4" # "GPT 4 Turbo 1106-preview"
|
87 |
+
|
88 |
+
MODEL_FAMILY = {
|
89 |
+
'OpenAI': MODELS_OPENAI,
|
90 |
+
'OpenAI Azure': MODELS_OPENAI_AZURE,
|
91 |
+
'Google': MODELS_GOOGLE,
|
92 |
+
'Mistral': MODELS_MISTRAL,
|
93 |
+
'Local': MODELS_LOCAL}
|
94 |
|
95 |
version_mapping_cost = {
|
96 |
'GPT 4 32k': 'GPT_4_32K',
|
|
|
327 |
|
328 |
@classmethod
|
329 |
def get_models_gui_list(cls):
|
330 |
+
return cls.MODELS_LOCAL + cls.MODELS_GOOGLE + cls.MODELS_OPENAI + cls.MODELS_OPENAI_AZURE + cls.MODELS_MISTRAL
|
331 |
+
|
332 |
+
@classmethod
|
333 |
+
def get_models_gui_list_family(cls, family=None):
|
334 |
+
if family and family in cls.MODEL_FAMILY:
|
335 |
+
return cls.MODEL_FAMILY[family]
|
336 |
+
all_models = []
|
337 |
+
for family_models in cls.MODEL_FAMILY.values():
|
338 |
+
all_models.extend(family_models)
|
339 |
+
return all_models
|
340 |
|
341 |
@classmethod
|
342 |
def get_version_mapping_cost(cls, key):
|
vouchervision/utils_LLM.py
CHANGED
@@ -8,11 +8,16 @@ import psutil
|
|
8 |
import threading
|
9 |
import torch
|
10 |
from datetime import datetime
|
11 |
-
from vouchervision.tool_taxonomy_WFO import validate_taxonomy_WFO, WFONameMatcher
|
12 |
-
from vouchervision.tool_geolocate_HERE import validate_coordinates_here
|
13 |
-
from vouchervision.tool_wikipedia import validate_wikipedia
|
14 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
def run_tools(output, tool_WFO, tool_GEO, tool_wikipedia, json_file_path_wiki):
|
18 |
# Define a function that will catch and return the results of your functions
|
@@ -179,15 +184,26 @@ class SystemLoadMonitor():
|
|
179 |
|
180 |
}
|
181 |
|
182 |
-
self.logger
|
183 |
-
|
184 |
-
|
185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
if self.has_GPU:
|
187 |
report.update({'max_gpu_load': str(round(self.gpu_usage['max_load'] * 100, 2))})
|
188 |
report.update({'max_gpu_vram_gb': str(round(self.gpu_usage['max_vram_usage'], 2))})
|
189 |
-
self.logger
|
190 |
-
|
|
|
|
|
|
|
|
|
191 |
else:
|
192 |
report.update({'max_gpu_load': '0'})
|
193 |
report.update({'max_gpu_vram_gb': '0'})
|
|
|
8 |
import threading
|
9 |
import torch
|
10 |
from datetime import datetime
|
|
|
|
|
|
|
11 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
12 |
|
13 |
+
try:
|
14 |
+
from vouchervision.tool_taxonomy_WFO import validate_taxonomy_WFO, WFONameMatcher
|
15 |
+
from vouchervision.tool_geolocate_HERE import validate_coordinates_here
|
16 |
+
from vouchervision.tool_wikipedia import validate_wikipedia
|
17 |
+
except:
|
18 |
+
from tool_taxonomy_WFO import validate_taxonomy_WFO, WFONameMatcher
|
19 |
+
from tool_geolocate_HERE import validate_coordinates_here
|
20 |
+
from tool_wikipedia import validate_wikipedia
|
21 |
|
22 |
def run_tools(output, tool_WFO, tool_GEO, tool_wikipedia, json_file_path_wiki):
|
23 |
# Define a function that will catch and return the results of your functions
|
|
|
184 |
|
185 |
}
|
186 |
|
187 |
+
if self.logger:
|
188 |
+
self.logger.info(f"Inference Time: {round(self.inference_time,2)} seconds")
|
189 |
+
self.logger.info(f"Tool Time: {round(tool_time,2)} seconds")
|
190 |
+
self.logger.info(f"Max CPU Usage: {round(self.gpu_usage['max_cpu_usage'],2)}%")
|
191 |
+
self.logger.info(f"Max RAM Usage: {round(self.gpu_usage['max_ram_usage'],2)}GB")
|
192 |
+
else:
|
193 |
+
print(f"Inference Time: {round(self.inference_time,2)} seconds")
|
194 |
+
print(f"Tool Time: {round(tool_time,2)} seconds")
|
195 |
+
print(f"Max CPU Usage: {round(self.gpu_usage['max_cpu_usage'],2)}%")
|
196 |
+
print(f"Max RAM Usage: {round(self.gpu_usage['max_ram_usage'],2)}GB")
|
197 |
+
|
198 |
if self.has_GPU:
|
199 |
report.update({'max_gpu_load': str(round(self.gpu_usage['max_load'] * 100, 2))})
|
200 |
report.update({'max_gpu_vram_gb': str(round(self.gpu_usage['max_vram_usage'], 2))})
|
201 |
+
if self.logger:
|
202 |
+
self.logger.info(f"Max GPU Load: {round(self.gpu_usage['max_load'] * 100, 2)}%")
|
203 |
+
self.logger.info(f"Max GPU Memory Usage: {round(self.gpu_usage['max_vram_usage'], 2)}GB")
|
204 |
+
else:
|
205 |
+
print(f"Max GPU Load: {round(self.gpu_usage['max_load'] * 100, 2)}%")
|
206 |
+
print(f"Max GPU Memory Usage: {round(self.gpu_usage['max_vram_usage'], 2)}GB")
|
207 |
else:
|
208 |
report.update({'max_gpu_load': '0'})
|
209 |
report.update({'max_gpu_vram_gb': '0'})
|
vouchervision/utils_VoucherVision.py
CHANGED
@@ -43,6 +43,10 @@ class VoucherVision():
|
|
43 |
self.prompt_version = None
|
44 |
self.is_hf = is_hf
|
45 |
|
|
|
|
|
|
|
|
|
46 |
### config_vals_for_permutation allows you to set the starting temp, top_k, top_p, seed....
|
47 |
self.config_vals_for_permutation = config_vals_for_permutation
|
48 |
|
@@ -649,11 +653,19 @@ class VoucherVision():
|
|
649 |
def perform_OCR_and_save_results(self, image_index, json_report, jpg_file_path_OCR_helper, txt_file_path_OCR, txt_file_path_OCR_bounds):
|
650 |
self.logger.info(f'Working on {image_index + 1}/{len(self.img_paths)} --- Starting OCR')
|
651 |
# self.OCR - None
|
|
|
|
|
|
|
652 |
|
653 |
### Process_image() runs the OCR for text, handwriting, trOCR AND creates the overlay image
|
654 |
ocr_google = OCREngine(self.logger, json_report, self.dir_home, self.is_hf, self.path_to_crop, self.cfg, self.trOCR_model_version, self.trOCR_model, self.trOCR_processor, self.device)
|
655 |
ocr_google.process_image(self.do_create_OCR_helper_image, self.logger)
|
656 |
self.OCR = ocr_google.OCR
|
|
|
|
|
|
|
|
|
|
|
657 |
self.logger.info(f"Complete OCR text for LLM prompt:\n\n{self.OCR}\n\n")
|
658 |
|
659 |
self.write_json_to_file(txt_file_path_OCR, ocr_google.OCR_JSON_to_file)
|
@@ -774,7 +786,8 @@ class VoucherVision():
|
|
774 |
|
775 |
self.update_progress_report_final(progress_report)
|
776 |
final_JSON_response = self.parse_final_json_response(final_JSON_response)
|
777 |
-
|
|
|
778 |
|
779 |
|
780 |
##################################################################################################################################
|
@@ -905,9 +918,9 @@ class VoucherVision():
|
|
905 |
if is_real_run:
|
906 |
progress_report.update_overall(f"Transcribing Labels")
|
907 |
|
908 |
-
final_json_response, final_WFO_record, final_GEO_record, total_tokens_in, total_tokens_out = self.send_to_LLM(self.is_azure, progress_report, json_report, self.model_name)
|
909 |
|
910 |
-
return final_json_response, final_WFO_record, final_GEO_record, total_tokens_in, total_tokens_out
|
911 |
|
912 |
except Exception as e:
|
913 |
self.logger.error(f"LLM call failed in process_specimen_batch: {e}")
|
|
|
43 |
self.prompt_version = None
|
44 |
self.is_hf = is_hf
|
45 |
|
46 |
+
self.OCR_cost = 0.0
|
47 |
+
self.OCR_tokens_in = 0
|
48 |
+
self.OCR_tokens_out = 0
|
49 |
+
|
50 |
### config_vals_for_permutation allows you to set the starting temp, top_k, top_p, seed....
|
51 |
self.config_vals_for_permutation = config_vals_for_permutation
|
52 |
|
|
|
653 |
def perform_OCR_and_save_results(self, image_index, json_report, jpg_file_path_OCR_helper, txt_file_path_OCR, txt_file_path_OCR_bounds):
|
654 |
self.logger.info(f'Working on {image_index + 1}/{len(self.img_paths)} --- Starting OCR')
|
655 |
# self.OCR - None
|
656 |
+
self.OCR_cost = 0.0
|
657 |
+
self.OCR_tokens_in = 0
|
658 |
+
self.OCR_tokens_out = 0
|
659 |
|
660 |
### Process_image() runs the OCR for text, handwriting, trOCR AND creates the overlay image
|
661 |
ocr_google = OCREngine(self.logger, json_report, self.dir_home, self.is_hf, self.path_to_crop, self.cfg, self.trOCR_model_version, self.trOCR_model, self.trOCR_processor, self.device)
|
662 |
ocr_google.process_image(self.do_create_OCR_helper_image, self.logger)
|
663 |
self.OCR = ocr_google.OCR
|
664 |
+
|
665 |
+
self.OCR_cost = ocr_google.cost
|
666 |
+
self.OCR_tokens_in = ocr_google.tokens_in
|
667 |
+
self.OCR_tokens_out = ocr_google.tokens_out
|
668 |
+
|
669 |
self.logger.info(f"Complete OCR text for LLM prompt:\n\n{self.OCR}\n\n")
|
670 |
|
671 |
self.write_json_to_file(txt_file_path_OCR, ocr_google.OCR_JSON_to_file)
|
|
|
786 |
|
787 |
self.update_progress_report_final(progress_report)
|
788 |
final_JSON_response = self.parse_final_json_response(final_JSON_response)
|
789 |
+
|
790 |
+
return final_JSON_response, final_WFO_record, final_GEO_record, self.total_tokens_in, self.total_tokens_out, self.OCR_cost, self.OCR_tokens_in, self.OCR_tokens_out
|
791 |
|
792 |
|
793 |
##################################################################################################################################
|
|
|
918 |
if is_real_run:
|
919 |
progress_report.update_overall(f"Transcribing Labels")
|
920 |
|
921 |
+
final_json_response, final_WFO_record, final_GEO_record, total_tokens_in, total_tokens_out, OCR_cost, OCR_tokens_in, OCR_tokens_out = self.send_to_LLM(self.is_azure, progress_report, json_report, self.model_name)
|
922 |
|
923 |
+
return final_json_response, final_WFO_record, final_GEO_record, total_tokens_in, total_tokens_out, OCR_cost, OCR_tokens_in, OCR_tokens_out
|
924 |
|
925 |
except Exception as e:
|
926 |
self.logger.error(f"LLM call failed in process_specimen_batch: {e}")
|
vouchervision/vouchervision_main.py
CHANGED
@@ -65,9 +65,9 @@ def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progr
|
|
65 |
# Process labels
|
66 |
Voucher_Vision = VoucherVision(cfg, logger, dir_home, path_custom_prompts, Project, Dirs, is_hf)
|
67 |
n_images = len(Voucher_Vision.img_paths)
|
68 |
-
last_JSON_response, final_WFO_record, final_GEO_record, total_tokens_in, total_tokens_out = Voucher_Vision.process_specimen_batch(progress_report, json_report, is_real_run)
|
69 |
|
70 |
-
total_cost = save_token_info_as_csv(Dirs, cfg['leafmachine']['LLM_version'], path_api_cost, total_tokens_in, total_tokens_out, n_images, dir_home, logger)
|
71 |
|
72 |
t_overall_s = perf_counter()
|
73 |
logger.name = 'Run Complete! :)'
|
|
|
65 |
# Process labels
|
66 |
Voucher_Vision = VoucherVision(cfg, logger, dir_home, path_custom_prompts, Project, Dirs, is_hf)
|
67 |
n_images = len(Voucher_Vision.img_paths)
|
68 |
+
last_JSON_response, final_WFO_record, final_GEO_record, total_tokens_in, total_tokens_out, OCR_cost, OCR_tokens_in, OCR_tokens_out = Voucher_Vision.process_specimen_batch(progress_report, json_report, is_real_run)
|
69 |
|
70 |
+
total_cost = save_token_info_as_csv(Dirs, cfg['leafmachine']['LLM_version'], path_api_cost, total_tokens_in, total_tokens_out, OCR_cost, OCR_tokens_in, OCR_tokens_out, n_images, dir_home, logger)
|
71 |
|
72 |
t_overall_s = perf_counter()
|
73 |
logger.name = 'Run Complete! :)'
|