Spaces:
Running
Running
phyloforfun
commited on
Commit
•
cc75837
1
Parent(s):
bf58fe8
file upload gallery
Browse files
app.py
CHANGED
@@ -681,6 +681,9 @@ def save_changes_to_API_keys(cfg_private,openai_api_key,azure_openai_api_version
|
|
681 |
def load_prompt_yaml(filename):
|
682 |
with open(filename, 'r') as file:
|
683 |
st.session_state['prompt_info'] = yaml.safe_load(file)
|
|
|
|
|
|
|
684 |
st.session_state['instructions'] = st.session_state['prompt_info'].get('instructions', st.session_state['default_instructions'])
|
685 |
st.session_state['json_formatting_instructions'] = st.session_state['prompt_info'].get('json_formatting_instructions', st.session_state['default_json_formatting_instructions'] )
|
686 |
st.session_state['rules'] = st.session_state['prompt_info'].get('rules', {})
|
@@ -692,11 +695,14 @@ def load_prompt_yaml(filename):
|
|
692 |
|
693 |
def save_prompt_yaml(filename, col_right_save):
|
694 |
yaml_content = {
|
|
|
|
|
|
|
|
|
695 |
'instructions': st.session_state['instructions'],
|
696 |
'json_formatting_instructions': st.session_state['json_formatting_instructions'],
|
697 |
'rules': st.session_state['rules'],
|
698 |
'mapping': st.session_state['mapping'],
|
699 |
-
'LLM': st.session_state['LLM']
|
700 |
}
|
701 |
|
702 |
dir_prompt = os.path.join(st.session_state.dir_home, 'custom_prompts')
|
@@ -716,28 +722,30 @@ def save_prompt_yaml(filename, col_right_save):
|
|
716 |
def upload_to_drive(filepath, filename):
|
717 |
# Parse the service account info from the environment variable
|
718 |
creds_info = json.loads(os.environ.get('GDRIVE_API'))
|
719 |
-
|
720 |
-
|
721 |
-
|
722 |
-
|
|
|
723 |
|
724 |
-
|
725 |
-
|
726 |
-
|
727 |
|
728 |
-
|
729 |
-
|
730 |
-
|
731 |
-
|
732 |
-
|
|
|
733 |
|
734 |
-
|
735 |
|
736 |
-
|
737 |
-
|
738 |
-
|
739 |
-
|
740 |
-
|
741 |
|
742 |
def check_unique_mapping_assignments():
|
743 |
if len(st.session_state['assigned_columns']) != len(set(st.session_state['assigned_columns'])):
|
@@ -776,6 +784,9 @@ def btn_load_prompt(selected_yaml_file, dir_prompt):
|
|
776 |
elif not selected_yaml_file:
|
777 |
# Directly assigning default values since no file is selected
|
778 |
st.session_state['prompt_info'] = {}
|
|
|
|
|
|
|
779 |
st.session_state['instructions'] = st.session_state['default_instructions']
|
780 |
st.session_state['json_formatting_instructions'] = st.session_state['default_json_formatting_instructions']
|
781 |
st.session_state['rules'] = {}
|
@@ -784,6 +795,9 @@ def btn_load_prompt(selected_yaml_file, dir_prompt):
|
|
784 |
st.session_state['assigned_columns'] = []
|
785 |
|
786 |
st.session_state['prompt_info'] = {
|
|
|
|
|
|
|
787 |
'instructions': st.session_state['instructions'],
|
788 |
'json_formatting_instructions': st.session_state['json_formatting_instructions'],
|
789 |
'rules': st.session_state['rules'],
|
@@ -791,6 +805,8 @@ def btn_load_prompt(selected_yaml_file, dir_prompt):
|
|
791 |
'LLM': st.session_state['LLM']
|
792 |
}
|
793 |
|
|
|
|
|
794 |
def upload_local_prompt_to_server(dir_prompt):
|
795 |
uploaded_file = st.file_uploader("Upload a custom prompt file", type=['yaml'])
|
796 |
if uploaded_file is not None:
|
@@ -818,6 +834,9 @@ def create_download_button(file_path, selected_yaml_file):
|
|
818 |
|
819 |
def build_LLM_prompt_config():
|
820 |
st.session_state['assigned_columns'] = []
|
|
|
|
|
|
|
821 |
st.session_state['default_instructions'] = """1. Refactor the unstructured OCR text into a dictionary based on the JSON structure outlined below.
|
822 |
2. You should map the unstructured OCR text to the appropriate JSON key and then populate the field based on its rules.
|
823 |
3. Some JSON key fields are permitted to remain empty if the corresponding information is not found in the unstructured OCR text.
|
@@ -885,15 +904,30 @@ The desired null value is also given. Populate the field with the null value of
|
|
885 |
# Create the download button
|
886 |
st.write('##')
|
887 |
create_download_button(download_file_path, st.session_state['selected_yaml_file'] )
|
|
|
|
|
|
|
|
|
|
|
|
|
888 |
|
|
|
|
|
889 |
|
|
|
|
|
|
|
|
|
|
|
|
|
890 |
# Define the options for the dropdown
|
891 |
llm_options = ['gpt', 'palm']
|
892 |
# Create the dropdown and set the value to session_state['LLM']
|
893 |
-
st.
|
894 |
-
|
|
|
895 |
|
896 |
-
|
897 |
# Instructions Section
|
898 |
st.header("Instructions")
|
899 |
st.write("These are the general instructions that guide the LLM through the transcription task. We recommend using the default instructions unless you have a specific reason to change them.")
|
@@ -1114,11 +1148,14 @@ The desired null value is also given. Populate the field with the null value of
|
|
1114 |
with col_prompt_main_right:
|
1115 |
st.subheader('All Prompt Components')
|
1116 |
st.session_state['prompt_info'] = {
|
|
|
|
|
|
|
|
|
1117 |
'instructions': st.session_state['instructions'],
|
1118 |
'json_formatting_instructions': st.session_state['json_formatting_instructions'],
|
1119 |
'rules': st.session_state['rules'],
|
1120 |
'mapping': st.session_state['mapping'],
|
1121 |
-
'LLM': st.session_state['LLM']
|
1122 |
}
|
1123 |
st.json(st.session_state['prompt_info'])
|
1124 |
|
|
|
681 |
def load_prompt_yaml(filename):
|
682 |
with open(filename, 'r') as file:
|
683 |
st.session_state['prompt_info'] = yaml.safe_load(file)
|
684 |
+
st.session_state['prompt_author'] = st.session_state['prompt_info'].get('prompt_author', st.session_state['default_prompt_author'])
|
685 |
+
st.session_state['prompt_author_institution'] = st.session_state['prompt_info'].get('prompt_author_institution', st.session_state['default_prompt_author_institution'])
|
686 |
+
st.session_state['prompt_description'] = st.session_state['prompt_info'].get('prompt_description', st.session_state['default_prompt_description'])
|
687 |
st.session_state['instructions'] = st.session_state['prompt_info'].get('instructions', st.session_state['default_instructions'])
|
688 |
st.session_state['json_formatting_instructions'] = st.session_state['prompt_info'].get('json_formatting_instructions', st.session_state['default_json_formatting_instructions'] )
|
689 |
st.session_state['rules'] = st.session_state['prompt_info'].get('rules', {})
|
|
|
695 |
|
696 |
def save_prompt_yaml(filename, col_right_save):
|
697 |
yaml_content = {
|
698 |
+
'prompt_author': st.session_state['prompt_author'],
|
699 |
+
'prompt_author_institution': st.session_state['prompt_author_institution'],
|
700 |
+
'prompt_description': st.session_state['prompt_description'],
|
701 |
+
'LLM': st.session_state['LLM'],
|
702 |
'instructions': st.session_state['instructions'],
|
703 |
'json_formatting_instructions': st.session_state['json_formatting_instructions'],
|
704 |
'rules': st.session_state['rules'],
|
705 |
'mapping': st.session_state['mapping'],
|
|
|
706 |
}
|
707 |
|
708 |
dir_prompt = os.path.join(st.session_state.dir_home, 'custom_prompts')
|
|
|
722 |
def upload_to_drive(filepath, filename):
|
723 |
# Parse the service account info from the environment variable
|
724 |
creds_info = json.loads(os.environ.get('GDRIVE_API'))
|
725 |
+
if creds_info:
|
726 |
+
creds = service_account.Credentials.from_service_account_info(
|
727 |
+
creds_info, scopes=["https://www.googleapis.com/auth/drive"]
|
728 |
+
)
|
729 |
+
service = build('drive', 'v3', credentials=creds)
|
730 |
|
731 |
+
# Get the folder ID from the environment variable
|
732 |
+
folder_id = os.environ.get('GDRIVE')
|
733 |
+
# st.info(f"{folder_id}")
|
734 |
|
735 |
+
if folder_id:
|
736 |
+
file_metadata = {
|
737 |
+
'name': filename,
|
738 |
+
'parents': [folder_id]
|
739 |
+
}
|
740 |
+
# st.info(f"{file_metadata}")
|
741 |
|
742 |
+
media = MediaFileUpload(filepath, mimetype='application/x-yaml')
|
743 |
|
744 |
+
service.files().create(
|
745 |
+
body=file_metadata,
|
746 |
+
media_body=media,
|
747 |
+
fields='id'
|
748 |
+
).execute()
|
749 |
|
750 |
def check_unique_mapping_assignments():
|
751 |
if len(st.session_state['assigned_columns']) != len(set(st.session_state['assigned_columns'])):
|
|
|
784 |
elif not selected_yaml_file:
|
785 |
# Directly assigning default values since no file is selected
|
786 |
st.session_state['prompt_info'] = {}
|
787 |
+
st.session_state['prompt_author'] = st.session_state['default_prompt_author']
|
788 |
+
st.session_state['prompt_author_institution'] = st.session_state['default_prompt_author_institution']
|
789 |
+
st.session_state['prompt_description'] = st.session_state['default_prompt_description']
|
790 |
st.session_state['instructions'] = st.session_state['default_instructions']
|
791 |
st.session_state['json_formatting_instructions'] = st.session_state['default_json_formatting_instructions']
|
792 |
st.session_state['rules'] = {}
|
|
|
795 |
st.session_state['assigned_columns'] = []
|
796 |
|
797 |
st.session_state['prompt_info'] = {
|
798 |
+
'prompt_author': st.session_state['prompt_author'],
|
799 |
+
'prompt_author_institution': st.session_state['prompt_author_institution'],
|
800 |
+
'prompt_description': st.session_state['prompt_description'],
|
801 |
'instructions': st.session_state['instructions'],
|
802 |
'json_formatting_instructions': st.session_state['json_formatting_instructions'],
|
803 |
'rules': st.session_state['rules'],
|
|
|
805 |
'LLM': st.session_state['LLM']
|
806 |
}
|
807 |
|
808 |
+
|
809 |
+
|
810 |
def upload_local_prompt_to_server(dir_prompt):
|
811 |
uploaded_file = st.file_uploader("Upload a custom prompt file", type=['yaml'])
|
812 |
if uploaded_file is not None:
|
|
|
834 |
|
835 |
def build_LLM_prompt_config():
|
836 |
st.session_state['assigned_columns'] = []
|
837 |
+
st.session_state['default_prompt_author'] = 'unknown'
|
838 |
+
st.session_state['default_prompt_author_institution'] = 'unknown'
|
839 |
+
st.session_state['default_prompt_description'] = 'unknown'
|
840 |
st.session_state['default_instructions'] = """1. Refactor the unstructured OCR text into a dictionary based on the JSON structure outlined below.
|
841 |
2. You should map the unstructured OCR text to the appropriate JSON key and then populate the field based on its rules.
|
842 |
3. Some JSON key fields are permitted to remain empty if the corresponding information is not found in the unstructured OCR text.
|
|
|
904 |
# Create the download button
|
905 |
st.write('##')
|
906 |
create_download_button(download_file_path, st.session_state['selected_yaml_file'] )
|
907 |
+
|
908 |
+
|
909 |
+
# Prompt Author Information
|
910 |
+
st.header("Prompt Author Information")
|
911 |
+
st.write("We value community contributions! Please provide your name(s) (or pseudonym if you prefer) for credit. If you leave this field blank, it will say 'unknown'.")
|
912 |
+
st.session_state['prompt_author'] = st.text_input("Enter names of prompt author(s)", value=st.session_state['default_prompt_author'])
|
913 |
|
914 |
+
st.write("Please provide your institution name. If you leave this field blank, it will say 'unknown'.")
|
915 |
+
st.session_state['prompt_author_institution'] = st.text_input("Enter name of institution", value=st.session_state['default_prompt_author_institution'])
|
916 |
|
917 |
+
st.write("Please provide a description of your prompt and its intended task. Is it designed for a specific collection? Taxa? Database structure?")
|
918 |
+
st.session_state['prompt_description'] = st.text_input("Enter description of prompt", value=st.session_state['default_prompt_description'])
|
919 |
+
|
920 |
+
|
921 |
+
st.write('---')
|
922 |
+
st.header("Set LLM Model Type")
|
923 |
# Define the options for the dropdown
|
924 |
llm_options = ['gpt', 'palm']
|
925 |
# Create the dropdown and set the value to session_state['LLM']
|
926 |
+
st.write("Which LLM is this prompt designed for? This will not restrict its use to a specific LLM, but some prompts will behave in different ways across models.")
|
927 |
+
st.write("For example, VoucherVision will automatically add multiple JSON formatting blocks to all PaLM 2 prompts to coax PaLM 2 to return a valid JSON object.")
|
928 |
+
st.session_state['LLM'] = st.selectbox('Set LLM', llm_options, index=llm_options.index(st.session_state.get('LLM', 'gpt')))
|
929 |
|
930 |
+
st.write('---')
|
931 |
# Instructions Section
|
932 |
st.header("Instructions")
|
933 |
st.write("These are the general instructions that guide the LLM through the transcription task. We recommend using the default instructions unless you have a specific reason to change them.")
|
|
|
1148 |
with col_prompt_main_right:
|
1149 |
st.subheader('All Prompt Components')
|
1150 |
st.session_state['prompt_info'] = {
|
1151 |
+
'prompt_author': st.session_state['prompt_author'],
|
1152 |
+
'prompt_author_institution': st.session_state['prompt_author_institution'],
|
1153 |
+
'prompt_description': st.session_state['prompt_description'],
|
1154 |
+
'LLM': st.session_state['LLM'],
|
1155 |
'instructions': st.session_state['instructions'],
|
1156 |
'json_formatting_instructions': st.session_state['json_formatting_instructions'],
|
1157 |
'rules': st.session_state['rules'],
|
1158 |
'mapping': st.session_state['mapping'],
|
|
|
1159 |
}
|
1160 |
st.json(st.session_state['prompt_info'])
|
1161 |
|