phyloforfun commited on
Commit
93fd830
1 Parent(s): 09d598d
app.py CHANGED
@@ -5,8 +5,10 @@ import plotly.graph_objs as go
5
  import numpy as np
6
  from itertools import chain
7
  from PIL import Image
 
8
  import pandas as pd
9
  from typing import Union
 
10
  from streamlit_extras.let_it_rain import rain
11
  from vouchervision.LeafMachine2_Config_Builder import write_config_file
12
  from vouchervision.VoucherVision_Config_Builder import build_VV_config, run_demo_tests_GPT, run_demo_tests_Palm , TestOptionsGPT, TestOptionsPalm, check_if_usable, run_api_tests
@@ -284,151 +286,151 @@ def create_space_saver():
284
  # st.title("VoucherVision")
285
  # create_private_file_0()
286
 
287
- def create_private_file():
288
- st.session_state.proceed_to_main = False
289
- st.title("VoucherVision")
290
- col_private,_= st.columns([12,2])
291
 
292
- if st.session_state.private_file:
293
- cfg_private = get_private_file()
294
- else:
295
- cfg_private = {}
296
- cfg_private['openai'] = {}
297
- cfg_private['openai']['OPENAI_API_KEY'] =''
298
 
299
- cfg_private['openai_azure'] = {}
300
- cfg_private['openai_azure']['openai_api_key'] = ''
301
- cfg_private['openai_azure']['api_version'] = ''
302
- cfg_private['openai_azure']['openai_api_base'] =''
303
- cfg_private['openai_azure']['openai_organization'] =''
304
- cfg_private['openai_azure']['openai_api_type'] =''
305
-
306
- cfg_private['google_cloud'] = {}
307
- cfg_private['google_cloud']['path_json_file'] =''
308
-
309
- cfg_private['google_palm'] = {}
310
- cfg_private['google_palm']['google_palm_api'] =''
311
 
312
 
313
- with col_private:
314
- st.header("Set API keys")
315
- st.info("***Note:*** There is a known bug with tabs in Streamlit. If you update an input field it may take you back to the 'Project Settings' tab. Changes that you made are saved, it's just an annoying glitch. We are aware of this issue and will fix it as soon as we can.")
316
- st.warning("To commit changes to API keys you must press the 'Set API Keys' button at the bottom of the page.")
317
- st.write("Before using VoucherVision you must set your API keys. All keys are stored locally on your computer and are never made public.")
318
- st.write("API keys are stored in `../VoucherVision/PRIVATE_DATA.yaml`.")
319
- st.write("Deleting this file will allow you to reset API keys. Alternatively, you can edit the keys in the user interface.")
320
- st.write("Leave keys blank if you do not intend to use that service.")
321
 
322
- st.write("---")
323
- st.subheader("Google Vision (*Required*)")
324
- st.markdown("VoucherVision currently uses [Google Vision API](https://cloud.google.com/vision/docs/ocr) for OCR. Generating an API key for this is more involved than the others. [Please carefully follow the instructions outlined here to create and setup your account.](https://cloud.google.com/vision/docs/setup) ")
325
- st.markdown("""
326
- Once your account is created, [visit this page](https://console.cloud.google.com) and create a project. Then follow these instructions:
327
-
328
- - **Select your Project**: If you have multiple projects, ensure you select the one where you've enabled the Vision API.
329
- - **Open the Navigation Menu**: Click on the hamburger menu (three horizontal lines) in the top left corner.
330
- - **Go to IAM & Admin**: In the navigation pane, hover over "IAM & Admin" and then click on "Service accounts."
331
- - **Locate Your Service Account**: Find the service account for which you wish to download the JSON key. If you haven't created a service account yet, you'll need to do so by clicking the "CREATE SERVICE ACCOUNT" button at the top.
332
- - **Download the JSON Key**:
333
- - Click on the three dots (actions menu) on the right side of your service account name.
334
- - Select "Manage keys."
335
- - In the pop-up window, click on the "ADD KEY" button and select "JSON."
336
- - The JSON key file will automatically be downloaded to your computer.
337
- - **Store Safely**: This file contains sensitive data that can be used to authenticate and bill your Google Cloud account. Never commit it to public repositories or expose it in any way. Always keep it safe and secure.
338
- """)
339
- with st.container():
340
- c_in_ocr, c_button_ocr = st.columns([10,2])
341
- with c_in_ocr:
342
- google_vision = st.text_input(label = 'Full path to Google Cloud JSON API key file', value = cfg_private['google_cloud'].get('path_json_file', ''),
343
- placeholder = 'e.g. C:/Documents/Secret_Files/google_API/application_default_credentials.json',
344
- help ="This API Key is in the form of a JSON file. Please save the JSON file in a safe directory. DO NOT store the JSON key inside of the VoucherVision directory.",
345
- type='password',key='924857298734590283750932809238')
346
- with c_button_ocr:
347
- st.empty()
348
 
349
 
350
- st.write("---")
351
- st.subheader("OpenAI")
352
- st.markdown("API key for first-party OpenAI API. Create an account with OpenAI [here](https://platform.openai.com/signup), then create an API key [here](https://platform.openai.com/account/api-keys).")
353
- with st.container():
354
- c_in_openai, c_button_openai = st.columns([10,2])
355
- with c_in_openai:
356
- openai_api_key = st.text_input("openai_api_key", cfg_private['openai'].get('OPENAI_API_KEY', ''),
357
- help='The actual API key. Likely to be a string of 2 character, a dash, and then a 48-character string: sk-XXXXXXXX...',
358
- placeholder = 'e.g. sk-XXXXXXXX...',
359
- type='password')
360
- with c_button_openai:
361
- st.empty()
362
-
363
- st.write("---")
364
- st.subheader("OpenAI - Azure")
365
- st.markdown("This version OpenAI relies on Azure servers directly as is intended for private enterprise instances of OpenAI's services, such as [UM-GPT](https://its.umich.edu/computing/ai). Administrators will provide you with the following information.")
366
- azure_openai_api_version = st.text_input("azure_openai_api_version", cfg_private['openai_azure'].get('api_version', ''),
367
- help='API Version e.g. "2023-05-15"',
368
- placeholder = 'e.g. 2023-05-15',
369
- type='password')
370
- azure_openai_api_key = st.text_input("azure_openai_api_key", cfg_private['openai_azure'].get('openai_api_key', ''),
371
- help='The actual API key. Likely to be a 32-character string',
372
- placeholder = 'e.g. 12333333333333333333333333333332',
373
- type='password')
374
- azure_openai_api_base = st.text_input("azure_openai_api_base", cfg_private['openai_azure'].get('openai_api_base', ''),
375
- help='The base url for the API e.g. "https://api.umgpt.umich.edu/azure-openai-api"',
376
- placeholder = 'e.g. https://api.umgpt.umich.edu/azure-openai-api',
377
- type='password')
378
- azure_openai_organization = st.text_input("azure_openai_organization", cfg_private['openai_azure'].get('openai_organization', ''),
379
- help='Your organization code. Likely a short string',
380
- placeholder = 'e.g. 123456',
381
- type='password')
382
- azure_openai_api_type = st.text_input("azure_openai_api_type", cfg_private['openai_azure'].get('openai_api_type', ''),
383
- help='The API type. Typically "azure"',
384
- placeholder = 'e.g. azure',
385
- type='password')
386
- with st.container():
387
- c_in_azure, c_button_azure = st.columns([10,2])
388
- with c_button_azure:
389
- st.empty()
390
 
391
- st.write("---")
392
- st.subheader("Google PaLM 2")
393
- st.markdown('Follow these [instructions](https://developers.generativeai.google/tutorials/setup) to generate an API key for PaLM 2. You may need to also activate an account with [MakerSuite](https://makersuite.google.com/app/apikey) and enable "early access."')
394
- with st.container():
395
- c_in_palm, c_button_palm = st.columns([10,2])
396
- with c_in_palm:
397
- google_palm = st.text_input("Google PaLM 2 API Key", cfg_private['google_palm'].get('google_palm_api', ''),
398
- help='The MakerSuite API key e.g. a 32-character string',
399
- placeholder='e.g. SATgthsykuE64FgrrrrEervr3S4455t_geyDeGq',
400
- type='password')
401
-
402
- with st.container():
403
- with c_button_ocr:
404
- st.write("##")
405
- st.button("Test OCR", on_click=test_API, args=['google_vision',c_in_ocr, cfg_private,openai_api_key,azure_openai_api_version,azure_openai_api_key,
406
- azure_openai_api_base,azure_openai_organization,azure_openai_api_type,google_vision,google_palm])
407
-
408
- with st.container():
409
- with c_button_openai:
410
- st.write("##")
411
- st.button("Test OpenAI", on_click=test_API, args=['openai',c_in_openai, cfg_private,openai_api_key,azure_openai_api_version,azure_openai_api_key,
412
- azure_openai_api_base,azure_openai_organization,azure_openai_api_type,google_vision,google_palm])
413
 
414
- with st.container():
415
- with c_button_azure:
416
- st.write("##")
417
- st.button("Test Azure OpenAI", on_click=test_API, args=['azure_openai',c_in_azure, cfg_private,openai_api_key,azure_openai_api_version,azure_openai_api_key,
418
- azure_openai_api_base,azure_openai_organization,azure_openai_api_type,google_vision,google_palm])
419
 
420
- with st.container():
421
- with c_button_palm:
422
- st.write("##")
423
- st.button("Test PaLM 2", on_click=test_API, args=['palm',c_in_palm, cfg_private,openai_api_key,azure_openai_api_version,azure_openai_api_key,
424
- azure_openai_api_base,azure_openai_organization,azure_openai_api_type,google_vision,google_palm])
425
 
426
 
427
- st.button("Set API Keys",type='primary', on_click=save_changes_to_API_keys, args=[cfg_private,openai_api_key,azure_openai_api_version,azure_openai_api_key,
428
- azure_openai_api_base,azure_openai_organization,azure_openai_api_type,google_vision,google_palm])
429
- if st.button('Proceed to VoucherVision'):
430
- st.session_state.proceed_to_private = False
431
- st.session_state.proceed_to_main = True
432
 
433
  def test_API(api, message_loc, cfg_private,openai_api_key,azure_openai_api_version,azure_openai_api_key, azure_openai_api_base,azure_openai_organization,azure_openai_api_type,google_vision,google_palm):
434
  # Save the API keys
@@ -561,6 +563,15 @@ def check_prompt_yaml_filename(fname):
561
  else:
562
  return False
563
 
 
 
 
 
 
 
 
 
 
564
 
565
  def btn_load_prompt(selected_yaml_file, dir_prompt):
566
  if selected_yaml_file:
@@ -921,7 +932,7 @@ def content_header():
921
 
922
  path_custom_prompts = os.path.join(st.session_state.dir_home,'custom_prompts',st.session_state.config['leafmachine']['project']['prompt_version'])
923
  # Call the machine function.
924
- last_JSON_response, total_cost = voucher_vision(None, st.session_state.dir_home, path_custom_prompts, None, progress_report,path_api_cost=os.path.join(st.session_state.dir_home,'api_cost','api_cost.yaml'))
925
 
926
  if total_cost:
927
  st.success(f":money_with_wings: This run cost :heavy_dollar_sign:{total_cost:.4f}")
@@ -936,10 +947,15 @@ def content_header():
936
  formatted_json = json.dumps(last_JSON_response, indent=4)
937
  st.markdown(f"Last JSON object in the batch:\n```\n{formatted_json}\n```")
938
  st.balloons()
 
 
 
 
939
 
940
  else:
941
  st.button("Start Processing", type='primary', disabled=True)
942
- st.error(":heavy_exclamation_mark: Required API keys not set. Please visit the 'API Keys' tab and set the Google Vision OCR API key and at least one LLM key.")
 
943
 
944
  with col_run_2:
945
  st.subheader('Run Tests', help="")
@@ -1281,7 +1297,8 @@ def main():
1281
  # Main App
1282
  content_header()
1283
 
1284
- tab_settings, tab_prompt, tab_domain, tab_component, tab_processing, tab_private, tab_delete = st.tabs(["Project Settings", "Prompt Builder", "Domain Knowledge","Component Detector", "Processing Options", "API Keys", "Space-Saver"])
 
1285
 
1286
  with tab_settings:
1287
  content_tab_settings()
@@ -1300,10 +1317,10 @@ def main():
1300
  with tab_processing:
1301
  content_tab_processing()
1302
 
1303
- with tab_private:
1304
- if st.button("Edit API Keys"):
1305
- st.session_state.proceed_to_private = True
1306
- st.rerun()
1307
 
1308
  with tab_delete:
1309
  create_space_saver()
@@ -1320,25 +1337,29 @@ if 'proceed_to_main' not in st.session_state:
1320
 
1321
  if 'proceed_to_build_llm_prompt' not in st.session_state:
1322
  st.session_state.proceed_to_build_llm_prompt = False # New state variable to control the flow
1323
- if 'proceed_to_private' not in st.session_state:
1324
- st.session_state.proceed_to_private = False # New state variable to control the flow
1325
 
1326
- if 'private_file' not in st.session_state:
1327
- st.session_state.private_file = does_private_file_exist()
1328
- if st.session_state.private_file:
1329
- st.session_state.proceed_to_main = True
1330
 
1331
  # Initialize session_state variables if they don't exist
1332
  if 'prompt_info' not in st.session_state:
1333
  st.session_state['prompt_info'] = {}
1334
  if 'rules' not in st.session_state:
1335
  st.session_state['rules'] = {}
 
 
 
1336
 
1337
- if not st.session_state.private_file:
1338
- create_private_file()
1339
- elif st.session_state.proceed_to_build_llm_prompt:
 
1340
  build_LLM_prompt_config()
1341
- elif st.session_state.proceed_to_private:
1342
- create_private_file()
1343
  elif st.session_state.proceed_to_main:
1344
  main()
 
5
  import numpy as np
6
  from itertools import chain
7
  from PIL import Image
8
+ from io import BytesIO
9
  import pandas as pd
10
  from typing import Union
11
+ from google.oauth2 import service_account
12
  from streamlit_extras.let_it_rain import rain
13
  from vouchervision.LeafMachine2_Config_Builder import write_config_file
14
  from vouchervision.VoucherVision_Config_Builder import build_VV_config, run_demo_tests_GPT, run_demo_tests_Palm , TestOptionsGPT, TestOptionsPalm, check_if_usable, run_api_tests
 
286
  # st.title("VoucherVision")
287
  # create_private_file_0()
288
 
289
+ # def create_private_file():
290
+ # st.session_state.proceed_to_main = False
291
+ # st.title("VoucherVision")
292
+ # col_private,_= st.columns([12,2])
293
 
294
+ # if st.session_state.private_file:
295
+ # cfg_private = get_private_file()
296
+ # else:
297
+ # cfg_private = {}
298
+ # cfg_private['openai'] = {}
299
+ # cfg_private['openai']['OPENAI_API_KEY'] =''
300
 
301
+ # cfg_private['openai_azure'] = {}
302
+ # cfg_private['openai_azure']['openai_api_key'] = ''
303
+ # cfg_private['openai_azure']['api_version'] = ''
304
+ # cfg_private['openai_azure']['openai_api_base'] =''
305
+ # cfg_private['openai_azure']['openai_organization'] =''
306
+ # cfg_private['openai_azure']['openai_api_type'] =''
307
+
308
+ # cfg_private['google_cloud'] = {}
309
+ # cfg_private['google_cloud']['path_json_file'] =''
310
+
311
+ # cfg_private['google_palm'] = {}
312
+ # cfg_private['google_palm']['google_palm_api'] =''
313
 
314
 
315
+ # with col_private:
316
+ # st.header("Set API keys")
317
+ # st.info("***Note:*** There is a known bug with tabs in Streamlit. If you update an input field it may take you back to the 'Project Settings' tab. Changes that you made are saved, it's just an annoying glitch. We are aware of this issue and will fix it as soon as we can.")
318
+ # st.warning("To commit changes to API keys you must press the 'Set API Keys' button at the bottom of the page.")
319
+ # st.write("Before using VoucherVision you must set your API keys. All keys are stored locally on your computer and are never made public.")
320
+ # st.write("API keys are stored in `../VoucherVision/PRIVATE_DATA.yaml`.")
321
+ # st.write("Deleting this file will allow you to reset API keys. Alternatively, you can edit the keys in the user interface.")
322
+ # st.write("Leave keys blank if you do not intend to use that service.")
323
 
324
+ # st.write("---")
325
+ # st.subheader("Google Vision (*Required*)")
326
+ # st.markdown("VoucherVision currently uses [Google Vision API](https://cloud.google.com/vision/docs/ocr) for OCR. Generating an API key for this is more involved than the others. [Please carefully follow the instructions outlined here to create and setup your account.](https://cloud.google.com/vision/docs/setup) ")
327
+ # st.markdown("""
328
+ # Once your account is created, [visit this page](https://console.cloud.google.com) and create a project. Then follow these instructions:
329
+
330
+ # - **Select your Project**: If you have multiple projects, ensure you select the one where you've enabled the Vision API.
331
+ # - **Open the Navigation Menu**: Click on the hamburger menu (three horizontal lines) in the top left corner.
332
+ # - **Go to IAM & Admin**: In the navigation pane, hover over "IAM & Admin" and then click on "Service accounts."
333
+ # - **Locate Your Service Account**: Find the service account for which you wish to download the JSON key. If you haven't created a service account yet, you'll need to do so by clicking the "CREATE SERVICE ACCOUNT" button at the top.
334
+ # - **Download the JSON Key**:
335
+ # - Click on the three dots (actions menu) on the right side of your service account name.
336
+ # - Select "Manage keys."
337
+ # - In the pop-up window, click on the "ADD KEY" button and select "JSON."
338
+ # - The JSON key file will automatically be downloaded to your computer.
339
+ # - **Store Safely**: This file contains sensitive data that can be used to authenticate and bill your Google Cloud account. Never commit it to public repositories or expose it in any way. Always keep it safe and secure.
340
+ # """)
341
+ # with st.container():
342
+ # c_in_ocr, c_button_ocr = st.columns([10,2])
343
+ # with c_in_ocr:
344
+ # google_vision = st.text_input(label = 'Full path to Google Cloud JSON API key file', value = cfg_private['google_cloud'].get('path_json_file', ''),
345
+ # placeholder = 'e.g. C:/Documents/Secret_Files/google_API/application_default_credentials.json',
346
+ # help ="This API Key is in the form of a JSON file. Please save the JSON file in a safe directory. DO NOT store the JSON key inside of the VoucherVision directory.",
347
+ # type='password',key='924857298734590283750932809238')
348
+ # with c_button_ocr:
349
+ # st.empty()
350
 
351
 
352
+ # st.write("---")
353
+ # st.subheader("OpenAI")
354
+ # st.markdown("API key for first-party OpenAI API. Create an account with OpenAI [here](https://platform.openai.com/signup), then create an API key [here](https://platform.openai.com/account/api-keys).")
355
+ # with st.container():
356
+ # c_in_openai, c_button_openai = st.columns([10,2])
357
+ # with c_in_openai:
358
+ # openai_api_key = st.text_input("openai_api_key", cfg_private['openai'].get('OPENAI_API_KEY', ''),
359
+ # help='The actual API key. Likely to be a string of 2 character, a dash, and then a 48-character string: sk-XXXXXXXX...',
360
+ # placeholder = 'e.g. sk-XXXXXXXX...',
361
+ # type='password')
362
+ # with c_button_openai:
363
+ # st.empty()
364
+
365
+ # st.write("---")
366
+ # st.subheader("OpenAI - Azure")
367
+ # st.markdown("This version OpenAI relies on Azure servers directly as is intended for private enterprise instances of OpenAI's services, such as [UM-GPT](https://its.umich.edu/computing/ai). Administrators will provide you with the following information.")
368
+ # azure_openai_api_version = st.text_input("azure_openai_api_version", cfg_private['openai_azure'].get('api_version', ''),
369
+ # help='API Version e.g. "2023-05-15"',
370
+ # placeholder = 'e.g. 2023-05-15',
371
+ # type='password')
372
+ # azure_openai_api_key = st.text_input("azure_openai_api_key", cfg_private['openai_azure'].get('openai_api_key', ''),
373
+ # help='The actual API key. Likely to be a 32-character string',
374
+ # placeholder = 'e.g. 12333333333333333333333333333332',
375
+ # type='password')
376
+ # azure_openai_api_base = st.text_input("azure_openai_api_base", cfg_private['openai_azure'].get('openai_api_base', ''),
377
+ # help='The base url for the API e.g. "https://api.umgpt.umich.edu/azure-openai-api"',
378
+ # placeholder = 'e.g. https://api.umgpt.umich.edu/azure-openai-api',
379
+ # type='password')
380
+ # azure_openai_organization = st.text_input("azure_openai_organization", cfg_private['openai_azure'].get('openai_organization', ''),
381
+ # help='Your organization code. Likely a short string',
382
+ # placeholder = 'e.g. 123456',
383
+ # type='password')
384
+ # azure_openai_api_type = st.text_input("azure_openai_api_type", cfg_private['openai_azure'].get('openai_api_type', ''),
385
+ # help='The API type. Typically "azure"',
386
+ # placeholder = 'e.g. azure',
387
+ # type='password')
388
+ # with st.container():
389
+ # c_in_azure, c_button_azure = st.columns([10,2])
390
+ # with c_button_azure:
391
+ # st.empty()
392
 
393
+ # st.write("---")
394
+ # st.subheader("Google PaLM 2")
395
+ # st.markdown('Follow these [instructions](https://developers.generativeai.google/tutorials/setup) to generate an API key for PaLM 2. You may need to also activate an account with [MakerSuite](https://makersuite.google.com/app/apikey) and enable "early access."')
396
+ # with st.container():
397
+ # c_in_palm, c_button_palm = st.columns([10,2])
398
+ # with c_in_palm:
399
+ # google_palm = st.text_input("Google PaLM 2 API Key", cfg_private['google_palm'].get('google_palm_api', ''),
400
+ # help='The MakerSuite API key e.g. a 32-character string',
401
+ # placeholder='e.g. SATgthsykuE64FgrrrrEervr3S4455t_geyDeGq',
402
+ # type='password')
403
+
404
+ # with st.container():
405
+ # with c_button_ocr:
406
+ # st.write("##")
407
+ # st.button("Test OCR", on_click=test_API, args=['google_vision',c_in_ocr, cfg_private,openai_api_key,azure_openai_api_version,azure_openai_api_key,
408
+ # azure_openai_api_base,azure_openai_organization,azure_openai_api_type,google_vision,google_palm])
409
+
410
+ # with st.container():
411
+ # with c_button_openai:
412
+ # st.write("##")
413
+ # st.button("Test OpenAI", on_click=test_API, args=['openai',c_in_openai, cfg_private,openai_api_key,azure_openai_api_version,azure_openai_api_key,
414
+ # azure_openai_api_base,azure_openai_organization,azure_openai_api_type,google_vision,google_palm])
415
 
416
+ # with st.container():
417
+ # with c_button_azure:
418
+ # st.write("##")
419
+ # st.button("Test Azure OpenAI", on_click=test_API, args=['azure_openai',c_in_azure, cfg_private,openai_api_key,azure_openai_api_version,azure_openai_api_key,
420
+ # azure_openai_api_base,azure_openai_organization,azure_openai_api_type,google_vision,google_palm])
421
 
422
+ # with st.container():
423
+ # with c_button_palm:
424
+ # st.write("##")
425
+ # st.button("Test PaLM 2", on_click=test_API, args=['palm',c_in_palm, cfg_private,openai_api_key,azure_openai_api_version,azure_openai_api_key,
426
+ # azure_openai_api_base,azure_openai_organization,azure_openai_api_type,google_vision,google_palm])
427
 
428
 
429
+ # st.button("Set API Keys",type='primary', on_click=save_changes_to_API_keys, args=[cfg_private,openai_api_key,azure_openai_api_version,azure_openai_api_key,
430
+ # azure_openai_api_base,azure_openai_organization,azure_openai_api_type,google_vision,google_palm])
431
+ # if st.button('Proceed to VoucherVision'):
432
+ # st.session_state.proceed_to_private = False
433
+ # st.session_state.proceed_to_main = True
434
 
435
  def test_API(api, message_loc, cfg_private,openai_api_key,azure_openai_api_version,azure_openai_api_key, azure_openai_api_base,azure_openai_organization,azure_openai_api_type,google_vision,google_palm):
436
  # Save the API keys
 
563
  else:
564
  return False
565
 
566
+ def create_download_button(zip_filepath):
567
+ with open(zip_filepath, 'rb') as f:
568
+ bytes_io = BytesIO(f.read())
569
+ st.download_button(
570
+ label="Download Results",
571
+ data=bytes_io,
572
+ file_name=os.path.basename(zip_filepath),
573
+ mime='application/zip'
574
+ )
575
 
576
  def btn_load_prompt(selected_yaml_file, dir_prompt):
577
  if selected_yaml_file:
 
932
 
933
  path_custom_prompts = os.path.join(st.session_state.dir_home,'custom_prompts',st.session_state.config['leafmachine']['project']['prompt_version'])
934
  # Call the machine function.
935
+ last_JSON_response, total_cost, st.session_state['zip_filepath'] = voucher_vision(None, st.session_state.dir_home, path_custom_prompts, None, progress_report,path_api_cost=os.path.join(st.session_state.dir_home,'api_cost','api_cost.yaml'))
936
 
937
  if total_cost:
938
  st.success(f":money_with_wings: This run cost :heavy_dollar_sign:{total_cost:.4f}")
 
947
  formatted_json = json.dumps(last_JSON_response, indent=4)
948
  st.markdown(f"Last JSON object in the batch:\n```\n{formatted_json}\n```")
949
  st.balloons()
950
+
951
+ if st.session_state['zip_filepath']:
952
+ create_download_button(st.session_state['zip_filepath'])
953
+
954
 
955
  else:
956
  st.button("Start Processing", type='primary', disabled=True)
957
+ # st.error(":heavy_exclamation_mark: Required API keys not set. Please visit the 'API Keys' tab and set the Google Vision OCR API key and at least one LLM key.")
958
+ st.error(":heavy_exclamation_mark: Required API keys not set. Please set the API keys as 'Secrets' for your Hugging Face Space. Visit the 'Settings' tab at the top of the page.")
959
 
960
  with col_run_2:
961
  st.subheader('Run Tests', help="")
 
1297
  # Main App
1298
  content_header()
1299
 
1300
+ # tab_settings, tab_prompt, tab_domain, tab_component, tab_processing, tab_private, tab_delete = st.tabs(["Project Settings", "Prompt Builder", "Domain Knowledge","Component Detector", "Processing Options", "API Keys", "Space-Saver"])
1301
+ tab_settings, tab_prompt, tab_domain, tab_component, tab_processing, tab_delete = st.tabs(["Project Settings", "Prompt Builder", "Domain Knowledge","Component Detector", "Processing Options", "Space-Saver"])
1302
 
1303
  with tab_settings:
1304
  content_tab_settings()
 
1317
  with tab_processing:
1318
  content_tab_processing()
1319
 
1320
+ # with tab_private:
1321
+ # if st.button("Edit API Keys"):
1322
+ # st.session_state.proceed_to_private = True
1323
+ # st.rerun()
1324
 
1325
  with tab_delete:
1326
  create_space_saver()
 
1337
 
1338
  if 'proceed_to_build_llm_prompt' not in st.session_state:
1339
  st.session_state.proceed_to_build_llm_prompt = False # New state variable to control the flow
1340
+ # if 'proceed_to_private' not in st.session_state:
1341
+ # st.session_state.proceed_to_private = False # New state variable to control the flow
1342
 
1343
+ # if 'private_file' not in st.session_state:
1344
+ # st.session_state.private_file = does_private_file_exist()
1345
+ # if st.session_state.private_file:
1346
+ # st.session_state.proceed_to_main = True
1347
 
1348
  # Initialize session_state variables if they don't exist
1349
  if 'prompt_info' not in st.session_state:
1350
  st.session_state['prompt_info'] = {}
1351
  if 'rules' not in st.session_state:
1352
  st.session_state['rules'] = {}
1353
+ if 'zip_filepath' not in st.session_state:
1354
+ st.session_state['zip_filepath'] = None
1355
+
1356
 
1357
+ # if not st.session_state.private_file:
1358
+ # # create_private_file()
1359
+ # st.header()
1360
+ if st.session_state.proceed_to_build_llm_prompt:
1361
  build_LLM_prompt_config()
1362
+ # elif st.session_state.proceed_to_private:
1363
+ # create_private_file()
1364
  elif st.session_state.proceed_to_main:
1365
  main()
vouchervision/OCR_google_cloud_vision.py CHANGED
@@ -26,8 +26,8 @@ def draw_boxes(image, bounds, color):
26
  )
27
  return image
28
 
29
- def detect_text(path):
30
- client = vision.ImageAnnotatorClient()
31
  with io.open(path, 'rb') as image_file:
32
  content = image_file.read()
33
  image = vision.Image(content=content)
 
26
  )
27
  return image
28
 
29
+ def detect_text(path, client):
30
+ # client = vision.ImageAnnotatorClient()
31
  with io.open(path, 'rb') as image_file:
32
  content = image_file.read()
33
  image = vision.Image(content=content)
vouchervision/VoucherVision_Config_Builder.py CHANGED
@@ -527,50 +527,76 @@ def run_api_tests(api):
527
  print(e)
528
  return False
529
 
530
- def has_API_key(val):
531
- if val != '':
532
- return True
533
- else:
534
- return False
 
 
 
 
535
 
536
- def check_if_usable():
537
- dir_home = os.path.dirname(os.path.dirname(__file__))
538
- path_cfg_private = os.path.join(dir_home, 'PRIVATE_DATA.yaml')
539
- cfg_private = get_cfg_from_full_path(path_cfg_private)
540
 
541
- has_key_openai = has_API_key(cfg_private['openai']['OPENAI_API_KEY'])
542
 
543
- has_key_azure_openai = has_API_key(cfg_private['openai_azure']['api_version'])
544
 
545
- has_key_palm2 = has_API_key(cfg_private['google_palm']['google_palm_api'])
546
 
547
- has_key_google_OCR = has_API_key(cfg_private['google_cloud']['path_json_file'])
548
-
549
- if has_key_google_OCR and (has_key_azure_openai or has_key_openai or has_key_palm2):
550
- return True
551
- else:
552
- return False
553
-
554
- def check_API_key(dir_home, api_version):
555
- dir_home = os.path.dirname(os.path.dirname(__file__))
556
- path_cfg_private = os.path.join(dir_home, 'PRIVATE_DATA.yaml')
557
- cfg_private = get_cfg_from_full_path(path_cfg_private)
558
-
559
- has_key_openai = has_API_key(cfg_private['openai']['OPENAI_API_KEY'])
560
-
561
- has_key_azure_openai = has_API_key(cfg_private['openai_azure']['api_version'])
562
-
563
- has_key_palm2 = has_API_key(cfg_private['google_palm']['google_palm_api'])
564
-
565
- has_key_google_OCR = has_API_key(cfg_private['google_cloud']['path_json_file'])
566
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
567
  if api_version == 'palm' and has_key_palm2:
568
  return True
569
- elif api_version in ['gpt','openai'] and has_key_openai:
570
- return True
571
- elif api_version in ['gpt-azure', 'azure_openai'] and has_key_azure_openai:
572
  return True
573
  elif api_version == 'google-vision-ocr' and has_key_google_OCR:
574
  return True
575
  else:
576
  return False
 
 
527
  print(e)
528
  return False
529
 
530
+ # def has_API_key(val):
531
+ # if val != '':
532
+ # return True
533
+ # else:
534
+ # return False
535
+ def has_API_key(key_name):
536
+ # Check if the environment variable by key_name is not None
537
+ return os.getenv(key_name) is not None
538
+
539
 
540
+ # def check_if_usable():
541
+ # dir_home = os.path.dirname(os.path.dirname(__file__))
542
+ # path_cfg_private = os.path.join(dir_home, 'PRIVATE_DATA.yaml')
543
+ # cfg_private = get_cfg_from_full_path(path_cfg_private)
544
 
545
+ # has_key_openai = has_API_key(cfg_private['openai']['OPENAI_API_KEY'])
546
 
547
+ # has_key_azure_openai = has_API_key(cfg_private['openai_azure']['api_version'])
548
 
549
+ # has_key_palm2 = has_API_key(cfg_private['google_palm']['google_palm_api'])
550
 
551
+ # has_key_google_OCR = has_API_key(cfg_private['google_cloud']['path_json_file'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
552
 
553
+ # if has_key_google_OCR and (has_key_azure_openai or has_key_openai or has_key_palm2):
554
+ # return True
555
+ # else:
556
+ # return False
557
+ def check_if_usable():
558
+ has_key_openai = os.getenv('OPENAI_API_KEY') is not None
559
+ has_key_palm2 = os.getenv('PALM') is not None
560
+ has_key_google_OCR = os.getenv('GOOGLE_APPLICATION_CREDENTIALS') is not None
561
+
562
+ return has_key_google_OCR and (has_key_openai or has_key_palm2)
563
+
564
+ # def check_API_key(dir_home, api_version):
565
+ # dir_home = os.path.dirname(os.path.dirname(__file__))
566
+ # path_cfg_private = os.path.join(dir_home, 'PRIVATE_DATA.yaml')
567
+ # cfg_private = get_cfg_from_full_path(path_cfg_private)
568
+
569
+ # has_key_openai = has_API_key(cfg_private['openai']['OPENAI_API_KEY'])
570
+
571
+ # has_key_azure_openai = has_API_key(cfg_private['openai_azure']['api_version'])
572
+
573
+ # has_key_palm2 = has_API_key(cfg_private['google_palm']['google_palm_api'])
574
+
575
+ # has_key_google_OCR = has_API_key(cfg_private['google_cloud']['path_json_file'])
576
+
577
+ # if api_version == 'palm' and has_key_palm2:
578
+ # return True
579
+ # elif api_version in ['gpt','openai'] and has_key_openai:
580
+ # return True
581
+ # elif api_version in ['gpt-azure', 'azure_openai'] and has_key_azure_openai:
582
+ # return True
583
+ # elif api_version == 'google-vision-ocr' and has_key_google_OCR:
584
+ # return True
585
+ # else:
586
+ # return False
587
+ def check_API_key(api_version):
588
+ # The API keys are assumed to be set in the environment variables
589
+ has_key_openai = os.getenv('OPENAI_API_KEY') is not None
590
+ has_key_palm2 = os.getenv('PALM') is not None
591
+ has_key_google_OCR = os.getenv('GOOGLE_APPLICATION_CREDENTIALS') is not None
592
+
593
+ # Depending on the api_version, check if the corresponding key is present
594
  if api_version == 'palm' and has_key_palm2:
595
  return True
596
+ elif api_version in ['gpt', 'openai'] and has_key_openai:
 
 
597
  return True
598
  elif api_version == 'google-vision-ocr' and has_key_google_OCR:
599
  return True
600
  else:
601
  return False
602
+
vouchervision/general_utils.py CHANGED
@@ -21,6 +21,10 @@ https://helpx.adobe.com/content/dam/help/en/photoshop/pdf/dng_commandline.pdf
21
 
22
  # https://stackoverflow.com/questions/287871/how-do-i-print-colored-text-to-the-terminal
23
 
 
 
 
 
24
  def validate_dir(dir):
25
  if not os.path.exists(dir):
26
  os.makedirs(dir)
 
21
 
22
  # https://stackoverflow.com/questions/287871/how-do-i-print-colored-text-to-the-terminal
23
 
24
+ def make_zipfile(source_dir, output_filename):
25
+ shutil.make_archive(output_filename, 'zip', source_dir)
26
+ return output_filename + '.zip'
27
+
28
  def validate_dir(dir):
29
  if not os.path.exists(dir):
30
  os.makedirs(dir)
vouchervision/utils_VoucherVision.py CHANGED
@@ -4,6 +4,7 @@ import openpyxl
4
  from openpyxl import Workbook, load_workbook
5
  import google.generativeai as palm
6
  from langchain.chat_models import AzureChatOpenAI
 
7
 
8
  currentdir = os.path.dirname(os.path.abspath(
9
  inspect.getfile(inspect.currentframe())))
@@ -54,6 +55,7 @@ class VoucherVision():
54
  self.Dirs = Dirs
55
  self.headers = None
56
  self.prompt_version = None
 
57
 
58
  self.set_API_keys()
59
  self.setup()
@@ -366,51 +368,38 @@ class VoucherVision():
366
  wb.save(path_transcription)
367
 
368
 
369
-
370
-
371
- def has_API_key(self, val):
372
- if val != '':
373
- return True
374
- else:
375
- return False
376
-
377
  def set_API_keys(self):
378
- self.dir_home = os.path.dirname(os.path.dirname(__file__))
379
- self.path_cfg_private = os.path.join(self.dir_home, 'PRIVATE_DATA.yaml')
380
- self.cfg_private = get_cfg_from_full_path(self.path_cfg_private)
 
381
 
382
- self.has_key_openai = self.has_API_key(self.cfg_private['openai']['OPENAI_API_KEY'])
383
-
384
- self.has_key_azure_openai = self.has_API_key(self.cfg_private['openai_azure']['api_version'])
385
-
386
- self.has_key_palm2 = self.has_API_key(self.cfg_private['google_palm']['google_palm_api'])
387
-
388
- self.has_key_google_OCR = self.has_API_key(self.cfg_private['google_cloud']['path_json_file'])
389
-
390
- if self.has_key_openai:
391
- openai.api_key = self.cfg_private['openai']['OPENAI_API_KEY']
392
- os.environ["OPENAI_API_KEY"] = self.cfg_private['openai']['OPENAI_API_KEY']
393
-
394
-
395
- if self.has_key_azure_openai:
396
- # os.environ["OPENAI_API_KEY"] = self.cfg_private['openai_azure']['openai_api_key']
397
- self.llm = AzureChatOpenAI(
398
- deployment_name='gpt-35-turbo',
399
- openai_api_version=self.cfg_private['openai_azure']['api_version'],
400
- openai_api_key=self.cfg_private['openai_azure']['openai_api_key'],
401
- openai_api_base=self.cfg_private['openai_azure']['openai_api_base'],
402
- openai_organization=self.cfg_private['openai_azure']['openai_organization'],
403
- openai_api_type=self.cfg_private['openai_azure']['openai_api_type']
404
- )
405
 
406
  if self.has_key_google_OCR:
407
- os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.cfg_private['google_cloud']['path_json_file']
 
 
 
 
 
 
 
 
 
 
408
 
409
  if self.has_key_palm2:
410
- os.environ['PALM'] = self.cfg_private['google_palm']['google_palm_api']
411
- palm.configure(api_key=os.environ['PALM'])
412
 
 
 
 
413
 
 
414
  def initialize_embeddings(self):
415
  '''Loading embedding search __init__(self, db_name, path_domain_knowledge, logger, build_new_db=False, model_name="hkunlp/instructor-xl", device="cuda")'''
416
  self.Voucher_Vision_Embedding = VoucherVisionEmbedding(self.db_name, self.path_domain_knowledge, logger=self.logger, build_new_db=self.build_new_db)
@@ -567,7 +556,7 @@ class VoucherVision():
567
  # Use Google Vision API to get OCR
568
  # self.OCR = detect_text(path_to_crop)
569
  self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Starting OCR')
570
- self.OCR, self.bounds, self.text_to_box_mapping = detect_text(path_to_crop)
571
  self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Finished OCR')
572
  if len(self.OCR) > 0:
573
  self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Creating OCR Overlay Image')
@@ -632,7 +621,7 @@ class VoucherVision():
632
  filename_without_extension, txt_file_path, txt_file_path_OCR, txt_file_path_OCR_bounds, jpg_file_path_OCR_helper = self.generate_paths(path_to_crop, i)
633
 
634
  # Use Google Vision API to get OCR
635
- self.OCR, self.bounds, self.text_to_box_mapping = detect_text(path_to_crop)
636
  if len(self.OCR) > 0:
637
  self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Starting OCR')
638
  self.OCR = self.OCR.replace("\'", "Minutes").replace('\"', "Seconds")
@@ -755,7 +744,7 @@ class VoucherVision():
755
  def process_specimen_batch_OCR_test(self, path_to_crop):
756
  for img_filename in os.listdir(path_to_crop):
757
  img_path = os.path.join(path_to_crop, img_filename)
758
- self.OCR, self.bounds, self.text_to_box_mapping = detect_text(img_path)
759
 
760
 
761
 
 
4
  from openpyxl import Workbook, load_workbook
5
  import google.generativeai as palm
6
  from langchain.chat_models import AzureChatOpenAI
7
+ from google.oauth2 import service_account
8
 
9
  currentdir = os.path.dirname(os.path.abspath(
10
  inspect.getfile(inspect.currentframe())))
 
55
  self.Dirs = Dirs
56
  self.headers = None
57
  self.prompt_version = None
58
+ self.client = None
59
 
60
  self.set_API_keys()
61
  self.setup()
 
368
  wb.save(path_transcription)
369
 
370
 
 
 
 
 
 
 
 
 
371
  def set_API_keys(self):
372
+ # Access secrets directly from the environment
373
+ openai_api_key = os.getenv('OPENAI_API_KEY')
374
+ google_application_credentials = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
375
+ palm_api_key = os.getenv('PALM')
376
 
377
+ self.has_key_openai = openai_api_key is not None
378
+ self.has_key_google_OCR = google_application_credentials is not None
379
+ self.has_key_palm2 = palm_api_key is not None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
 
381
  if self.has_key_google_OCR:
382
+ # Get the credentials JSON from the environment variable
383
+ google_credentials_json = os.getenv('GOOGLE_APPLICATION_CREDENTIALS')
384
+ # Convert the JSON string into a Python dictionary
385
+ google_credentials_dict = json.loads(google_credentials_json)
386
+ # Create a credentials object
387
+ google_credentials = service_account.Credentials.from_service_account_info(google_credentials_dict)
388
+
389
+ # Now, use this `google_credentials` object to authenticate your Google Cloud services
390
+ # For example, if you are using the Google Vision API, it would look like this:
391
+ from google.cloud import vision
392
+ self.client = vision.ImageAnnotatorClient(credentials=google_credentials)
393
 
394
  if self.has_key_palm2:
395
+ os.environ['PALM'] = palm_api_key
396
+ palm.configure(api_key=palm_api_key)
397
 
398
+ if self.has_key_openai:
399
+ openai.api_key = openai_api_key
400
+ os.environ["OPENAI_API_KEY"] = openai_api_key
401
 
402
+
403
  def initialize_embeddings(self):
404
  '''Loading embedding search __init__(self, db_name, path_domain_knowledge, logger, build_new_db=False, model_name="hkunlp/instructor-xl", device="cuda")'''
405
  self.Voucher_Vision_Embedding = VoucherVisionEmbedding(self.db_name, self.path_domain_knowledge, logger=self.logger, build_new_db=self.build_new_db)
 
556
  # Use Google Vision API to get OCR
557
  # self.OCR = detect_text(path_to_crop)
558
  self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Starting OCR')
559
+ self.OCR, self.bounds, self.text_to_box_mapping = detect_text(path_to_crop, self.client)
560
  self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Finished OCR')
561
  if len(self.OCR) > 0:
562
  self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Creating OCR Overlay Image')
 
621
  filename_without_extension, txt_file_path, txt_file_path_OCR, txt_file_path_OCR_bounds, jpg_file_path_OCR_helper = self.generate_paths(path_to_crop, i)
622
 
623
  # Use Google Vision API to get OCR
624
+ self.OCR, self.bounds, self.text_to_box_mapping = detect_text(path_to_crop, self.client)
625
  if len(self.OCR) > 0:
626
  self.logger.info(f'Working on {i+1}/{len(self.img_paths)} --- Starting OCR')
627
  self.OCR = self.OCR.replace("\'", "Minutes").replace('\"', "Seconds")
 
744
  def process_specimen_batch_OCR_test(self, path_to_crop):
745
  for img_filename in os.listdir(path_to_crop):
746
  img_path = os.path.join(path_to_crop, img_filename)
747
+ self.OCR, self.bounds, self.text_to_box_mapping = detect_text(img_path, self.client)
748
 
749
 
750
 
vouchervision/vouchervision_main.py CHANGED
@@ -8,7 +8,7 @@ parentdir = os.path.dirname(currentdir)
8
  sys.path.append(parentdir)
9
  sys.path.append(currentdir)
10
  from vouchervision.component_detector.component_detector import detect_plant_components, detect_archival_components
11
- from general_utils import add_to_expense_report, save_token_info_as_csv, print_main_start, check_for_subdirs_VV, load_config_file, load_config_file_testing, report_config, save_config_file, subset_dir_images, crop_detections_from_images_VV
12
  from directory_structure_VV import Dir_Structure
13
  from data_project import Project_Info
14
  from LM2_logger import start_logging
@@ -88,7 +88,14 @@ def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progr
88
  handler.close()
89
  logger.removeHandler(handler)
90
 
91
- return last_JSON_response, total_cost
 
 
 
 
 
 
 
92
 
93
  def voucher_vision_OCR_test(cfg_file_path, dir_home, cfg_test, path_to_crop):
94
  # get_n_overall = progress_report.get_n_overall()
@@ -143,6 +150,7 @@ def voucher_vision_OCR_test(cfg_file_path, dir_home, cfg_test, path_to_crop):
143
  Voucher_Vision = VoucherVision(cfg, logger, dir_home, None, Project, Dirs)
144
  last_JSON_response = Voucher_Vision.process_specimen_batch_OCR_test(path_to_crop)
145
 
 
146
  if __name__ == '__main__':
147
  is_test = False
148
 
 
8
  sys.path.append(parentdir)
9
  sys.path.append(currentdir)
10
  from vouchervision.component_detector.component_detector import detect_plant_components, detect_archival_components
11
+ from general_utils import make_zipfile, add_to_expense_report, save_token_info_as_csv, print_main_start, check_for_subdirs_VV, load_config_file, load_config_file_testing, report_config, save_config_file, subset_dir_images, crop_detections_from_images_VV
12
  from directory_structure_VV import Dir_Structure
13
  from data_project import Project_Info
14
  from LM2_logger import start_logging
 
88
  handler.close()
89
  logger.removeHandler(handler)
90
 
91
+ # Create Higging Face zip file
92
+ dir_to_zip = os.path.join(Dirs.dir_home, Dirs.run_name)
93
+ zip_filename = Dirs.run_name
94
+
95
+ # Creating a zip file
96
+ zip_filepath = make_zipfile(dir_to_zip, zip_filename)
97
+
98
+ return last_JSON_response, total_cost, zip_filepath
99
 
100
  def voucher_vision_OCR_test(cfg_file_path, dir_home, cfg_test, path_to_crop):
101
  # get_n_overall = progress_report.get_n_overall()
 
150
  Voucher_Vision = VoucherVision(cfg, logger, dir_home, None, Project, Dirs)
151
  last_JSON_response = Voucher_Vision.process_specimen_batch_OCR_test(path_to_crop)
152
 
153
+
154
  if __name__ == '__main__':
155
  is_test = False
156