Entz commited on
Commit
307a717
1 Parent(s): e97d234

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +63 -53
  2. qa.db +0 -0
  3. requirements.txt +6 -4
  4. tab1_intro.txt +23 -0
  5. tab2_pe.txt +24 -0
app.py CHANGED
@@ -1,23 +1,56 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import sqlite3
 
4
  from llama_index.llms.ollama import Ollama
 
 
 
 
 
5
 
6
  # Initialize the SQLite3 database
7
  conn = sqlite3.connect('qa.db')
8
  c = conn.cursor()
9
- c.execute('CREATE TABLE IF NOT EXISTS qa (question TEXT, answer TEXT)')
 
10
  conn.commit()
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- # Initialize the Ollama object
14
- @st.cache_resource ### This caches the model loading function
15
- def load_ollama_model():
16
- return Ollama(model="mistral", request_timeout=30.0)
 
 
 
 
17
 
18
  # Save the question and answer to the SQLite3 database
19
- def save_to_db(question, answer):
20
- c.execute('INSERT INTO qa (question, answer) VALUES (?, ?)', (question, answer))
21
  conn.commit()
22
 
23
  # Fetch all data from the SQLite3 database
@@ -32,56 +65,35 @@ def main():
32
 
33
  with tab1:
34
  st.subheader("LLM Model Description")
35
- st.write("""
36
- Welcome to our pilot program that aims to test and evaluate the capabilities of the Mistral 7B model in understanding and providing information about the Wandsworth Council. This application leverages the **Ollama** framework, specifically utilizing the advanced 'mistral' architecture to handle question and answer tasks.
37
-
38
- ### What is the Mistral 7B Model?
39
- The Mistral 7B model is a sophisticated language model designed to understand and generate human-like text. It is part of a new generation of models developed to handle complex queries with a high degree of accuracy. This model is trained on a diverse dataset, enabling it to provide insightful and accurate answers to a wide range of questions.
40
-
41
- ### About Ollama
42
- **Ollama** is a cutting-edge framework that facilitates the deployment and use of large language models (LLMs). By integrating the Mistral 7B model within the Ollama framework, this application can efficiently process and respond to user queries.
43
-
44
- ### How Does It Work?
45
- - **Ask a Question**: Users can input their questions regarding the Wandsworth Council in the "Ask a Question" tab. The Mistral 7B model will process the query and generate a relevant response based on its extensive training data.
46
- - **View Q&A History**: All user queries and the corresponding answers are stored in a database, allowing us to analyze the performance of the model over time. Users can view the history of questions and answers in the "View Q&A History" tab.
47
-
48
- ### Objectives
49
- The primary goal of this pilot program is to assess how well the Mistral 7B model can handle queries related to local government information, specifically focusing on the Wandsworth Council. By collecting and analyzing user queries, we aim to:
50
- - Identify strengths and weaknesses in the model’s responses.
51
- - Improve the model’s accuracy and reliability in providing information about local council services.
52
- - Explore the potential of LLMs in supporting local government communications and citizen engagement.
53
-
54
- We encourage users to ask diverse and challenging questions to help us test the limits of the Mistral 7B model’s knowledge and understanding. Your participation and feedback are invaluable to the success of this program.
55
-
56
- Thank you for being part of this exciting journey to explore the capabilities of advanced language models in serving local communities!
57
-
58
- Last Update: 23rd June 2024
59
- """)
60
 
61
  with tab2:
62
  st.subheader("Ask a Question")
63
  question = st.text_input("Enter your question:")
64
  if st.button("Get Answer"):
65
  if question:
66
- # each time when it run tab2, this load_ollama_model won't be reload, thx to st.cache_resource
67
- llm = load_ollama_model()
68
- response = llm.complete(question)
69
-
70
- ### # Print response for debugging
71
- ### st.write("Debug: Response object")
72
- ### st.write(response)
73
-
74
- # Try to extract the generated text
75
  try:
76
- answer = response.text
77
- except AttributeError as e:
78
- st.error(f"Error extracting text from response: {e}")
79
- answer = "Sorry, could not generate an answer."
80
-
81
- st.write(f"**Answer:** {answer}")
82
-
83
- # Save question and answer to database
84
- save_to_db(question, answer)
 
 
 
 
 
 
 
 
 
 
 
85
  else:
86
  st.warning("Please enter a question")
87
 
@@ -89,12 +101,10 @@ def main():
89
  st.subheader("View Q&A History")
90
  qa_data = fetch_from_db()
91
  if qa_data:
92
- df = pd.DataFrame(qa_data, columns=["Question", "Answer"])
93
  st.dataframe(df)
94
  else:
95
  st.write("No data available")
96
 
97
  if __name__ == "__main__":
98
- debug = True # Set to False to disable debugging
99
-
100
  main()
 
1
  import streamlit as st
2
  import pandas as pd
3
  import sqlite3
4
+ from llama_index.core import StorageContext, load_index_from_storage
5
  from llama_index.llms.ollama import Ollama
6
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
7
+ from llama_index.core import PromptTemplate
8
+ import os
9
+
10
+ version = 2.2
11
 
12
  # Initialize the SQLite3 database
13
  conn = sqlite3.connect('qa.db')
14
  c = conn.cursor()
15
+ # Update the table creation to include the version column
16
+ c.execute('CREATE TABLE IF NOT EXISTS qa (question TEXT, answer TEXT, version REAL)')
17
  conn.commit()
18
 
19
+ # Read the LLM Model Description from a file
20
+ def read_description_from_file(file_path):
21
+ with open(file_path, 'r') as file:
22
+ return file.read()
23
+
24
+ # Define the folder containing the saved index
25
+ INDEX_OUTPUT_PATH = "./output_index"
26
+
27
+ # Ensure the output directory exists
28
+ if not os.path.exists(INDEX_OUTPUT_PATH):
29
+ raise ValueError(f"Index directory {INDEX_OUTPUT_PATH} does not exist")
30
+
31
+ # Setup LLM and embedding model
32
+ llm = Ollama(model="llama3", request_timeout=120.0)
33
+ embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5", trust_remote_code=True)
34
+
35
+ # To load the index later, set up the storage context
36
+ storage_context = StorageContext.from_defaults(persist_dir=INDEX_OUTPUT_PATH)
37
+ loaded_index = load_index_from_storage(embed_model=embed_model, storage_context=storage_context)
38
+
39
+ # Define a query engine (assuming it needs the LLM and embedding model)
40
+ query_engine = loaded_index.as_query_engine(llm=llm, embed_model=embed_model)
41
 
42
+ # Customise prompt template
43
+ # Read the prompt template from a file
44
+ qa_prompt_tmpl_str = read_description_from_file("tab2_pe.txt")
45
+ qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)
46
+
47
+ query_engine.update_prompts(
48
+ {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
49
+ )
50
 
51
  # Save the question and answer to the SQLite3 database
52
+ def save_to_db(question, answer, version):
53
+ c.execute('INSERT INTO qa (question, answer, version) VALUES (?, ?, ?)', (question, answer, version))
54
  conn.commit()
55
 
56
  # Fetch all data from the SQLite3 database
 
65
 
66
  with tab1:
67
  st.subheader("LLM Model Description")
68
+ description = read_description_from_file("tab1_intro.txt")
69
+ st.write(description)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  with tab2:
72
  st.subheader("Ask a Question")
73
  question = st.text_input("Enter your question:")
74
  if st.button("Get Answer"):
75
  if question:
 
 
 
 
 
 
 
 
 
76
  try:
77
+ response = query_engine.query(question)
78
+
79
+ # Try to extract the generated text
80
+ try:
81
+ # Extract the text from the response object (assuming it has a `text` attribute or method)
82
+ if hasattr(response, 'text'):
83
+ answer = response.text
84
+ else:
85
+ answer = str(response)
86
+
87
+ except AttributeError as e:
88
+ st.error(f"Error extracting text from response: {e}")
89
+ answer = "Sorry, could not generate an answer."
90
+
91
+ st.write(f"**Answer:** {answer}")
92
+
93
+ # Save question and answer to database
94
+ save_to_db(question, answer, version)
95
+ except Exception as e:
96
+ st.error(f"An error occurred: {e}")
97
  else:
98
  st.warning("Please enter a question")
99
 
 
101
  st.subheader("View Q&A History")
102
  qa_data = fetch_from_db()
103
  if qa_data:
104
+ df = pd.DataFrame(qa_data, columns=["Question", "Answer", "Version"])
105
  st.dataframe(df)
106
  else:
107
  st.write("No data available")
108
 
109
  if __name__ == "__main__":
 
 
110
  main()
qa.db CHANGED
Binary files a/qa.db and b/qa.db differ
 
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
- Streamlit
2
- pandas
3
- llama-index
4
- llama-index-llms-ollama
 
 
 
1
+ streamlit==1.36.0
2
+ pandas==2.2.2
3
+ llama_index==0.10.50
4
+ transformers==4.41.2
5
+ llama_index.llms.ollama
6
+ llama_index.embeddings.huggingface
tab1_intro.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Welcome to our pilot program that aims to test and evaluate the capabilities of the Mistral 7B model in understanding and providing information about the Wandsworth Council. This application leverages the **Ollama** framework, specifically utilizing the advanced 'mistral' architecture to handle question and answer tasks.
2
+
3
+ ### What is the Mistral 7B Model?
4
+ The Mistral 7B model is a sophisticated language model designed to understand and generate human-like text. It is part of a new generation of models developed to handle complex queries with a high degree of accuracy. This model is trained on a diverse dataset, enabling it to provide insightful and accurate answers to a wide range of questions.
5
+
6
+ ### About Ollama
7
+ **Ollama** is a cutting-edge framework that facilitates the deployment and use of large language models (LLMs). By integrating the Mistral 7B model within the Ollama framework, this application can efficiently process and respond to user queries.
8
+
9
+ ### How Does It Work?
10
+ - **Ask a Question**: Users can input their questions regarding the Wandsworth Council in the "Ask a Question" tab. The Mistral 7B model will process the query and generate a relevant response based on its extensive training data.
11
+ - **View Q&A History**: All user queries and the corresponding answers are stored in a database, allowing us to analyze the performance of the model over time. Users can view the history of questions and answers in the "View Q&A History" tab.
12
+
13
+ ### Objectives
14
+ The primary goal of this pilot program is to assess how well the Mistral 7B model can handle queries related to local government information, specifically focusing on the Wandsworth Council. By collecting and analyzing user queries, we aim to:
15
+ - Identify strengths and weaknesses in the model’s responses.
16
+ - Improve the model’s accuracy and reliability in providing information about local council services.
17
+ - Explore the potential of LLMs in supporting local government communications and citizen engagement.
18
+
19
+ We encourage users to ask diverse and challenging questions to help us test the limits of the Mistral 7B model’s knowledge and understanding. Your participation and feedback are invaluable to the success of this program.
20
+
21
+ Thank you for being part of this exciting journey to explore the capabilities of advanced language models in serving local communities!
22
+
23
+ Last Update: 23rd June 2024
tab2_pe.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Context information is below.
2
+ ---------------------
3
+ {context_str}
4
+ ---------------------
5
+ You work as a support for residents' query about the council tax in Wandsworth.
6
+ Even though you're a senior with more than 30 years experience, you double check all the facts in documentation.
7
+ Our documentation is absolutely up-to-date, so you can fully rely on it when answering questions (you don't need to check the actual content on the council webpage).
8
+ Your work is very important for the team success.
9
+ You need to ensure that you provide the best possible support: answering all the questions, making no assumptions
10
+ and sharing only the factual content.
11
+ Be practical, and needless to be creative, just try your best to solve the customer problem.
12
+ You are not allowed to share the details of the pdf, or md, or word documents.
13
+ You are not allowed to use short links, like bit.ly links or the kind.
14
+ You are not allowed to leave a blank link, or empty links, e.g. http://, or https://, or 'www.example.com', etc.
15
+ you are only allowed to share world wide web links starting with https://www.wandsworth.gov.uk.
16
+ If your content is from the RAG, or from the md or pdf files, and if you are going to quote this source, please only quote the source url link at the top of each file, e.g. "source: https://www.wandsworth.gov.uk/council-tax/council-tax-discounts-exemptions-and-reductions/apply-for-a-council-tax-discount/#impaired" at the top of the file "disabled_impaired.md".
17
+ Do not invite people to see any internal files, e.g. 'see [disabled_impaired.md]', 'pdf', 'md', 'etc'.
18
+ Do not show the RAG structure, Do not tell the folder or directory structure.
19
+ Do not tell anything about the documents in RAG system. If any query is asking about the data or information source, please say it is from the official webpage or just quote the source url link at the top of each file, e.g. "source: https://www.wandsworth.gov.uk/council-tax/council-tax-discounts-exemptions-and-reductions/apply-for-a-council-tax-discount/#impaired" at the top of the file "disabled_impaired.md".
20
+ Make each reply as an email format. Try to make the reply more helpful by including the url address.
21
+ If the sender's name is unknown, just use 'resident'.
22
+ Sign off with my name Lorentz, Data Scientist
23
+ Query: {query_str}
24
+ Answer: