rianders commited on
Commit
9831243
1 Parent(s): 4b8c5b3

Functioning pages added

Browse files
app.py CHANGED
@@ -1,39 +1,44 @@
1
  import streamlit as st
2
- from start_page import main as start_page
3
-
4
- # Import other pages. Assume each has a main function to run the page.
5
- from pages.data_source_config import main as data_source_config
6
- from pages.data_loading import main as data_loading
7
- # Add imports for other pages similarly...
8
-
9
- # Initialize session state for page navigation if not already set
10
- if 'page' not in st.session_state:
11
- st.session_state.page = 'start_page'
12
-
13
- # Define a function to change the page
14
- def change_page(page_name):
15
- st.session_state.page = page_name
16
-
17
- # Page selection (could also use st.sidebar for these)
18
- st.sidebar.title("Navigation")
19
- st.sidebar.button("Start Page", on_click=change_page, args=('start_page',))
20
- st.sidebar.button("Web and File Resource Configuration", on_click=change_page, args=('file_web_source_collection',))
21
- st.sidebar.button("Data Source Configuration", on_click=change_page, args=('data_source_config',))
22
- st.sidebar.button("Data Loading", on_click=change_page, args=('data_loading',))
23
- # Add buttons for other pages similarly...
24
-
25
- # Page dispatch
26
- if st.session_state.page == 'start_page':
27
- start_page()
28
- elif st.session_state.page == 'data_source_config':
29
- data_source_config()
30
- elif st.session_state.page == 'data_loading':
31
- data_loading()
32
- elif st.session_state.page == 'model_selection':
33
- model_selection()
34
- elif st.session_state.page == 'processing_embedding':
35
- processing_embedding()
36
-
37
-
38
-
39
- # The above could be optimized by mapping page names to functions
 
 
 
 
 
 
1
  import streamlit as st
2
+ import os
3
+
4
+ st.set_page_config(page_title='Knowledge Navigator', layout='wide')
5
+
6
+ def main():
7
+ st.title('Knowledge Navigator')
8
+
9
+ # Button to go back to Data Collection Page
10
+ if st.button('Go to Data Collection'):
11
+ st.switch_page('pages/01_data_collection.py')
12
+
13
+ # Button to navigate to Data Organization Page and pass data
14
+ if st.button('Go to Data Organization with Data'):
15
+ # Navigating to Data Organization Page
16
+ st.switch_page('pages/02_data_organization.py')
17
+
18
+ if st.button('Proceed to Model Selection'):
19
+ st.switch_page('pages/03_model_selection.py')
20
+
21
+ if st.button('Proceed to encoding vector storage'):
22
+ st.switch_page('pages/04_encoding_storage.py')
23
+
24
+ if st.button('Proceed to Q&A Testing'):
25
+ st.switch_page('pages/05_testing_qa.py')
26
+
27
+ # Check if 'data' state variable is defined
28
+ if 'data' in st.session_state:
29
+ st.write("Data Available")
30
+ st.write("Data (URL dataframe) is defined.")
31
+ else:
32
+ st.write("Data (URL dataframe) is not defined.")
33
+
34
+ # Check if 'docs' state variable is defined
35
+ if 'docs' in st.session_state:
36
+ st.write("Docs (fetched and stored data collection) is defined.")
37
+ else:
38
+ st.write("Docs (fetched and stored data collection) is not defined.")
39
+
40
+ # Render the navigation menu
41
+ # menu()
42
+
43
+ if __name__ == '__main__':
44
+ main()
pages/01_data_collection.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ import pandas as pd
4
+ import requests
5
+ from bs4 import BeautifulSoup
6
+ from urllib.parse import urljoin, urlparse
7
+ from datetime import datetime
8
+
9
+ def find_linked_urls_and_title(url):
10
+ try:
11
+ response = requests.get(url)
12
+ if response.status_code == 200:
13
+ soup = BeautifulSoup(response.text, 'html.parser')
14
+ links = soup.find_all('a')
15
+ urls = {link.get('href') for link in links if link.get('href') is not None}
16
+ title_tag = soup.find('title')
17
+ page_title = title_tag.text if title_tag else 'No Title Found'
18
+ return urls, page_title
19
+ else:
20
+ st.write(f"Failed to retrieve {url}")
21
+ return set(), 'No Title Found'
22
+ except Exception as e:
23
+ st.write(f"An error occurred with {url}: {e}")
24
+ return set(), 'No Title Found'
25
+
26
+ def convert_to_absolute_urls(base_url, links):
27
+ return {urljoin(base_url, link) if not link.startswith('http') else link for link in links}
28
+
29
+ def categorize_links(base_url, links):
30
+ internal_links, external_links = set(), set()
31
+ for link in links:
32
+ if urlparse(link).netloc == urlparse(base_url).netloc:
33
+ internal_links.add(link)
34
+ else:
35
+ external_links.add(link)
36
+ return internal_links, external_links
37
+
38
+ def display_editable_table(df):
39
+ edited_df = st.data_editor(data=df, key="data_editor_key", num_rows="dynamic") # Add num_rows="dynamic" to allow adding/deleting rows
40
+ return edited_df
41
+
42
+ def prepare_dataframe(df):
43
+ if "Ignore" not in df.columns:
44
+ df["Ignore"] = False # Initialize all values as False
45
+ return df
46
+
47
+ def store_data(df):
48
+ st.session_state['data'] = df
49
+
50
+ def main():
51
+ #menu()
52
+
53
+ st.title("Data Source Configuration")
54
+
55
+ # Initialize 'scanned_urls' with all columns, including 'Ignore'
56
+ if 'scanned_urls' not in st.session_state:
57
+ st.session_state['scanned_urls'] = pd.DataFrame(columns=['URL', 'Type', 'Page Name', 'Scanned DateTime', 'Ignore'])
58
+
59
+ st.subheader("Scan Websites for URLs")
60
+ url_input = st.text_area("Enter URLs to scan, separated by new lines:", "https://fubarlabs.org")
61
+ url_list = [url.strip() for url in url_input.strip().split('\n') if url.strip()]
62
+ scan_button_clicked = st.button("Scan URLs")
63
+
64
+ if scan_button_clicked:
65
+ for url in url_list:
66
+ unique_urls, page_title = find_linked_urls_and_title(url)
67
+ scan_datetime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
68
+ absolute_urls = convert_to_absolute_urls(url, unique_urls)
69
+ internal_links, external_links = categorize_links(url, absolute_urls)
70
+
71
+ new_entries = pd.DataFrame([(url, 'Internal', page_title, scan_datetime, False) for url in internal_links] +
72
+ [(url, 'External', page_title, scan_datetime, False) for url in external_links],
73
+ columns=['URL', 'Type', 'Page Name', 'Scanned DateTime', 'Ignore']) # Include 'Ignore' column
74
+ st.session_state['scanned_urls'] = pd.concat([st.session_state['scanned_urls'], new_entries]).drop_duplicates().reset_index(drop=True)
75
+ store_data(st.session_state['scanned_urls'])
76
+
77
+ if not st.session_state['scanned_urls'].empty:
78
+ # Prepare the dataframe, this now includes the 'Ignore' column from the start
79
+ prepared_df = prepare_dataframe(st.session_state['scanned_urls'])
80
+
81
+ # Display the editable table with an "Ignore" column
82
+ edited_df = display_editable_table(prepared_df)
83
+
84
+ if edited_df is not None:
85
+ st.session_state['scanned_urls'] = edited_df
86
+
87
+ # Access the edits made to the table
88
+ if "data_editor_key" in st.session_state:
89
+ edits = st.session_state["data_editor_key"]
90
+ st.write("Edits made to the table:")
91
+ st.write(edits)
92
+
93
+ if st.button('Proceed to Data Organization'):
94
+ st.switch_page('pages/02_data_organization.py')
95
+
96
+ if __name__ == "__main__":
97
+ main()
pages/02_data_organization.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 02_data_organization.py
2
+ import streamlit as st
3
+ from langchain_community.document_loaders import AsyncHtmlLoader
4
+ from langchain.schema import Document
5
+ import json
6
+ from typing import Iterable
7
+ import asyncio
8
+ from urllib.parse import urlparse
9
+
10
+ # Async fetch function
11
+ async def fetch_documents(urls):
12
+ loader = AsyncHtmlLoader(urls)
13
+ docs = await loader.aload()
14
+ return docs
15
+
16
+ def save_docs_to_jsonl(array: Iterable[Document], file_path: str) -> None:
17
+ with open(file_path, 'w') as jsonl_file:
18
+ for doc in array:
19
+ if hasattr(doc, 'to_dict'):
20
+ jsonl_file.write(json.dumps(doc.to_dict()) + '\n')
21
+ else:
22
+ jsonl_file.write(json.dumps(doc.__dict__) + '\n')
23
+
24
+ def load_docs_from_jsonl(file_path) -> Iterable[Document]:
25
+ array = []
26
+ with open(file_path, 'r') as jsonl_file:
27
+ for line in jsonl_file:
28
+ data = json.loads(line)
29
+ obj = Document(**data)
30
+ array.append(obj)
31
+ return array
32
+
33
+ def is_valid_url(url):
34
+ try:
35
+ result = urlparse(url)
36
+ return all([result.scheme, result.netloc])
37
+ except ValueError:
38
+ return False
39
+
40
+ def fetch_clean_organize_page():
41
+ st.title("Fetch, Clean, and Organize Documents")
42
+
43
+ # Check if 'data' exists in the session state
44
+ if 'data' not in st.session_state:
45
+ st.warning("No data found. Please go back to the previous page and scan URLs first.")
46
+ return
47
+
48
+ data = st.session_state['data']
49
+ st.write("URLs to fetch and clean:")
50
+ st.write(data)
51
+
52
+ # Filter out URLs marked as "Ignore" and invalid URLs
53
+ valid_urls = data[(data['Ignore'] == False) & (data['URL'].apply(is_valid_url))]['URL'].tolist()
54
+
55
+ if st.button("Fetch Documents"):
56
+ docs = asyncio.run(fetch_documents(valid_urls))
57
+ st.session_state['docs'] = docs
58
+ st.write(f"Fetched {len(st.session_state['docs'])} documents.")
59
+
60
+ if 'docs' in st.session_state:
61
+ if st.button("Save Documents as JSON"):
62
+ save_docs_to_jsonl(st.session_state['docs'], "documents.jsonl")
63
+ st.success("Documents saved as JSON.")
64
+
65
+ # Provide download link (streamlit >= 0.88.0)
66
+ with open("documents.jsonl", "rb") as file:
67
+ btn = st.download_button(
68
+ label="Download JSON",
69
+ data=file,
70
+ file_name="documents.jsonl",
71
+ mime="application/octet-stream"
72
+ )
73
+
74
+ # Assuming this function is called in your app
75
+ fetch_clean_organize_page()
pages/03_model_selection.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ st.title('Model Selection')
4
+
5
+ # Introduction
6
+ st.write("Select the embedding model and the large language model (LLM) for processing.")
7
+
8
+ # Embedding Model Selection
9
+ embedding_models = ["thenlper/gte-small", "sentence-transformers/all-MiniLM-L6-v2", "other"]
10
+ selected_embedding_model = st.selectbox("Select Embedding Model", options=embedding_models)
11
+
12
+ # LLM Model Selection
13
+ llm_models = ["mistralai/Mistral-7B-Instruct-v0.2", "gpt-3.5-turbo", "other"]
14
+ selected_llm_model = st.selectbox("Select LLM Model", options=llm_models)
15
+
16
+ # Display selections (for demonstration)
17
+ st.write("Selected Embedding Model:", selected_embedding_model)
18
+ st.write("Selected LLM Model:", selected_llm_model)
19
+
20
+ # Configuration options for the selected models
21
+ st.header("Model Configuration")
22
+
23
+ # Embedding Model Configuration (example)
24
+ if selected_embedding_model == "thenlper/gte-small":
25
+ # Placeholder for model-specific configuration options
26
+ st.write("No additional configuration required for this model.")
27
+ else:
28
+ # Configuration for other models
29
+ st.write("Configuration options for other models will appear here.")
30
+
31
+ # LLM Model Configuration (example)
32
+ if selected_llm_model == "mistralai/Mistral-7B-Instruct-v0.2":
33
+ max_tokens = st.slider("Max Tokens", min_value=100, max_value=1000, value=250)
34
+ temperature = st.slider("Temperature", min_value=0.0, max_value=1.0, value=0.7, step=0.01)
35
+ else:
36
+ # Configuration for other models
37
+ st.write("Configuration options for other models will appear here.")
38
+
39
+ # Save model selections and configurations
40
+ if st.button("Save Model Configuration"):
41
+ st.session_state['selected_embedding_model'] = selected_embedding_model
42
+ st.session_state['selected_llm_model'] = selected_llm_model
43
+
44
+ # Assuming configurations are more complex and vary per model, you might want to store them differently
45
+ st.session_state['llm_model_config'] = {"max_tokens": max_tokens, "temperature": temperature}
46
+
47
+ st.success("Model configurations saved.")
48
+
49
+ if st.button('Proceed to encoding vector storage'):
50
+ st.switch_page('pages/04_encoding_storage.py')
pages/04_encoding_storage.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_community.vectorstores import FAISS
3
+ from langchain_community.embeddings import HuggingFaceEmbeddings
4
+ from langchain_community.llms import HuggingFaceEndpoint
5
+ from langchain.schema import Document
6
+ import json
7
+ from typing import Iterable
8
+ import os
9
+ from datetime import datetime
10
+ import zipfile
11
+ import tempfile
12
+
13
+ def save_docs_to_jsonl(array:Iterable[Document], file_path:str)->None:
14
+ with open(file_path, 'w') as jsonl_file:
15
+ for doc in array:
16
+ jsonl_file.write(doc.json() + '\n')
17
+
18
+ def load_docs_from_jsonl(file)->Iterable[Document]:
19
+ array = []
20
+ for line in file:
21
+ data = json.loads(line.decode('utf-8'))
22
+ obj = Document(**data)
23
+ array.append(obj)
24
+ return array
25
+
26
+ st.title('Encoding and Storage')
27
+
28
+ # Create output directory
29
+ start_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
30
+ OUTPUT_DIR = "./out"
31
+
32
+ # Check if the directory exists, and if not, create it
33
+ if not os.path.exists(OUTPUT_DIR):
34
+ os.makedirs(OUTPUT_DIR)
35
+ st.write(f"Directory '{OUTPUT_DIR}' was created.")
36
+ else:
37
+ st.write(f"Directory '{OUTPUT_DIR}' already exists.")
38
+
39
+ # Allow the user to upload the JSON file if missing
40
+ # Allow the user to upload the JSONL file if missing
41
+ if 'docs' not in st.session_state:
42
+ st.write("Document collection not found in session state.")
43
+ uploaded_file = st.file_uploader("Upload JSONL file", type=["jsonl"])
44
+ if uploaded_file is not None:
45
+ try:
46
+ docs = load_docs_from_jsonl(uploaded_file)
47
+ st.session_state['docs'] = docs
48
+ st.write(f"Loaded {len(docs)} documents from the uploaded file.")
49
+ except Exception as e:
50
+ st.error(f"Error loading JSONL file: {str(e)}")
51
+ else:
52
+ docs = st.session_state['docs']
53
+ st.write(f"Loaded {len(docs)} documents from the session state.")
54
+ # Show the embedding model
55
+ EMBEDDING_MODEL_NAME = st.session_state.get('selected_embedding_model', "thenlper/gte-small")
56
+ st.write(f"Selected Embedding Model: {EMBEDDING_MODEL_NAME}")
57
+
58
+ # Allow the user to select the device (GPU or CPU)
59
+ device_form = st.form(key='device_form')
60
+ device = device_form.radio("Select Device", ("CUDA", "CPU"))
61
+ submit_device = device_form.form_submit_button(label='Submit Device')
62
+
63
+ if submit_device:
64
+ # Set up the embedding model
65
+ embedding_model = HuggingFaceEmbeddings(
66
+ model_name=EMBEDDING_MODEL_NAME,
67
+ multi_process=True,
68
+ model_kwargs={"device": device.lower()},
69
+ encode_kwargs={"normalize_embeddings": True}, # set True for cosine similarity
70
+ )
71
+
72
+ # Show the configuration
73
+ st.write("Embedding Model Configuration:")
74
+ st.write(embedding_model)
75
+
76
+ # Start the encoding
77
+ if 'docs' in st.session_state:
78
+ progress_bar = st.progress(0)
79
+ total_docs = len(docs)
80
+
81
+ collection_vectorstore = FAISS.from_documents(docs, embedding=embedding_model)
82
+ st.session_state['collection_vectorstore'] = collection_vectorstore
83
+
84
+ for i in range(total_docs):
85
+ progress_bar.progress((i + 1) / total_docs)
86
+
87
+ st.write("Encoding completed.")
88
+ else:
89
+ st.write("No documents found in the session state.")
90
+
91
+ # Allow saving and downloading the configuration
92
+ if st.button("Save and Download Configuration"):
93
+ if 'collection_vectorstore' in st.session_state:
94
+ collection_vectorstore = st.session_state['collection_vectorstore']
95
+ timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
96
+ zip_filename = f"docs_vectors_{timestamp}.zip"
97
+
98
+ with tempfile.TemporaryDirectory() as temp_dir:
99
+ collection_vectorstore.save_local(f"{temp_dir}/docs_vectors")
100
+
101
+ with zipfile.ZipFile(zip_filename, "w") as zip_file:
102
+ for root, _, files in os.walk(temp_dir):
103
+ for file in files:
104
+ file_path = os.path.join(root, file)
105
+ zip_file.write(file_path, os.path.relpath(file_path, temp_dir))
106
+
107
+ with open(zip_filename, "rb") as zip_file:
108
+ zip_bytes = zip_file.read()
109
+
110
+ st.download_button(
111
+ label="Download Configuration",
112
+ data=zip_bytes,
113
+ file_name=zip_filename,
114
+ mime="application/zip",
115
+ )
116
+
117
+ st.success("Configuration saved and downloaded.")
118
+ else:
119
+ st.warning("No vector store found. Please make sure the encoding is completed.")
120
+
121
+ if st.button('Proceed to Q&A Testing'):
122
+ st.switch_page('pages/05_testing_qa.py')
pages/05_testing_qa.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain_community.vectorstores import FAISS
3
+ from langchain_community.embeddings import HuggingFaceEmbeddings
4
+ from langchain_community.llms import HuggingFaceEndpoint
5
+ from langchain_core.prompts import ChatPromptTemplate
6
+ from langchain_core.runnables import RunnablePassthrough
7
+ from langchain_core.output_parsers import StrOutputParser
8
+ import tempfile
9
+ import zipfile
10
+ import os
11
+
12
+ st.title('Testing and QA')
13
+
14
+ # Dynamically load the selected models from the session state
15
+ EMBEDDING_MODEL_NAME = st.session_state.get('selected_embedding_model', "thenlper/gte-small")
16
+ LLM_MODEL_NAME = st.session_state.get('selected_llm_model', "mistralai/Mistral-7B-Instruct-v0.2")
17
+
18
+ # Initialization block for embedding_model, with a debug message
19
+ if 'embedding_model' not in st.session_state:
20
+ EMBEDDING_MODEL_NAME = st.session_state.get('selected_embedding_model', "thenlper/gte-small")
21
+ st.session_state['embedding_model'] = HuggingFaceEmbeddings(
22
+ model_name=EMBEDDING_MODEL_NAME,
23
+ multi_process=True,
24
+ model_kwargs={"device": "cpu"},
25
+ encode_kwargs={"normalize_embeddings": True},
26
+ )
27
+ st.info("embedding_model has been initialized.") # Debug message for initialization
28
+ else:
29
+ st.info("embedding_model was already initialized.") # Debug message if already initialized
30
+
31
+ # Now that we've ensured embedding_model is initialized, we can safely access it
32
+ embedding_model = st.session_state['embedding_model']
33
+ st.write("Accessing embedding_model...") # Debug message for accessing
34
+
35
+ # Form for LLM settings, allowing dynamic model selection
36
+ with st.form("llm_settings_form"):
37
+ st.subheader("LLM Settings")
38
+ repo_id = st.text_input("Repo ID", value=LLM_MODEL_NAME, key="repo_id")
39
+ max_new_tokens = st.number_input("Max New Tokens", value=250, key="max_new_tokens")
40
+ top_k = st.number_input("Top K", value=3, key="top_k")
41
+ top_p = st.number_input("Top P", value=0.95, key="top_p")
42
+ typical_p = st.number_input("Typical P", value=0.95, key="typical_p")
43
+ temperature = st.number_input("Temperature", value=0.01, key="temperature")
44
+ repetition_penalty = st.number_input("Repetition Penalty", value=1.035, key="repetition_penalty")
45
+
46
+ submitted = st.form_submit_button("Update LLM Settings")
47
+ if submitted:
48
+ st.session_state['llm'] = HuggingFaceEndpoint(
49
+ repo_id=repo_id,
50
+ max_new_tokens=max_new_tokens,
51
+ top_k=top_k,
52
+ top_p=top_p,
53
+ typical_p=typical_p,
54
+ temperature=temperature,
55
+ repetition_penalty=repetition_penalty,
56
+ )
57
+ st.success("LLM settings updated.")
58
+
59
+ # Vector store upload and setup
60
+ if 'collection_vectorstore' not in st.session_state:
61
+ uploaded_file = st.file_uploader("Upload Vector Store ZIP", type=["zip"])
62
+ if uploaded_file is not None:
63
+ with tempfile.TemporaryDirectory() as temp_dir:
64
+ with zipfile.ZipFile(uploaded_file, 'r') as zip_ref:
65
+ zip_ref.extractall(temp_dir)
66
+ docs_vectors_path = os.path.join(temp_dir, "docs_vectors")
67
+ st.session_state['collection_vectorstore'] = FAISS.load_local(docs_vectors_path, embeddings=embedding_model, allow_dangerous_deserialization=True)
68
+ st.success("Vector store uploaded and loaded successfully.")
69
+
70
+ # Create the retriever as soon as the vector store is created
71
+ st.session_state['retriever'] = st.session_state['collection_vectorstore'].as_retriever()
72
+ st.info("Retriever has been created.") # Debug message to confirm the retriever's creation
73
+
74
+
75
+ # Check if LLM and vector store are ready
76
+ if 'llm' in st.session_state and 'collection_vectorstore' in st.session_state:
77
+ # Use a button to indicate when to update the prompt template
78
+ if st.button("Update Prompt Template"):
79
+ # Assuming you have a text area where users input the new template
80
+ new_template = st.text_area("Enter new prompt template", key="new_prompt_template")
81
+ # Update the session state only when the button is pressed
82
+ st.session_state['prompt_template'] = new_template
83
+ st.success("Prompt template updated.")
84
+
85
+ # Ensure there's a default prompt template
86
+ if 'prompt_template' not in st.session_state:
87
+ st.session_state['prompt_template'] = "You are a knowledgeable assistant answering the following question based on the provided documents: {context} Question: {question}"
88
+
89
+ # Display the current template for editing
90
+ current_template = st.text_area("Edit Prompt Template", value=st.session_state['prompt_template'], key="current_prompt_template")
91
+
92
+ # Question input and processing
93
+ question = st.text_input("Enter your question", key="question_input")
94
+
95
+ if question:
96
+ llm = st.session_state['llm']
97
+ prompt = ChatPromptTemplate.from_template(current_template)
98
+ retriever = st.session_state['retriever']
99
+ chain = (
100
+ {"context": retriever, "question": RunnablePassthrough()}
101
+ | prompt
102
+ | llm
103
+ | StrOutputParser()
104
+ )
105
+
106
+ if st.button("Ask"):
107
+ result = chain.invoke(question)
108
+ st.subheader("Answer:")
109
+ st.write(result)
110
+ else:
111
+ st.warning("Please configure and submit the LLM settings and ensure the vector store is loaded to ask questions.")