mohcineelharras commited on
Commit
77b04d1
β€’
1 Parent(s): e6e7a99

templates done

Browse files
Files changed (3) hide show
  1. README.md +2 -1
  2. app.py +114 -54
  3. data/doctest.txt +4 -2
README.md CHANGED
@@ -9,4 +9,5 @@ app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
9
  pinned: false
10
  ---
11
 
12
+ How to pick chunks that are pertinent ?
13
+ How to stream response word by word ?
app.py CHANGED
@@ -12,6 +12,8 @@ from llama_index.embeddings import InstructorEmbedding
12
  from llama_index import ServiceContext, VectorStoreIndex, SimpleDirectoryReader
13
  from tqdm.notebook import tqdm
14
  from dotenv import load_dotenv
 
 
15
 
16
  # --------------------------------env variables-----------------------------------
17
 
@@ -22,12 +24,92 @@ no_proxy = os.getenv("no_proxy")
22
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
23
  OPENAI_API_BASE = os.getenv("OPENAI_API_BASE")
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  # --------------------------------cache LLM-----------------------------------
26
 
27
  logging.basicConfig(stream=sys.stdout, level=logging.INFO)
28
  logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
29
  llama_debug = LlamaDebugHandler(print_trace_on_end=True)
30
  callback_manager = CallbackManager([llama_debug])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  # LLM
32
  @st.cache_resource
33
  def load_llm_model():
@@ -40,7 +122,7 @@ def load_llm_model():
40
  model_path="models/dolphin-2.1-mistral-7b.Q4_K_S.gguf",
41
  temperature=0.0,
42
  max_new_tokens=100,
43
- context_window=1024,
44
  generate_kwargs={},
45
  model_kwargs={"n_gpu_layers": 20},
46
  messages_to_prompt=messages_to_prompt,
@@ -49,8 +131,6 @@ def load_llm_model():
49
  )
50
  return llm
51
 
52
- llm = load_llm_model()
53
-
54
  # --------------------------------cache Embedding model-----------------------------------
55
 
56
  @st.cache_resource
@@ -62,14 +142,13 @@ def load_emb_model():
62
  embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base"
63
  #model_name="hkunlp/instructor-base"
64
  )
65
- service_context = ServiceContext.from_defaults(embed_model=embed_model_inst, llm=llm)
 
66
  documents = SimpleDirectoryReader("data").load_data()
67
  print(f"Number of documents: {len(documents)}")
68
  index = VectorStoreIndex.from_documents(
69
  documents, service_context=service_context, show_progress=True)
70
- return index.as_query_engine()
71
-
72
- query_engine = load_emb_model()
73
 
74
  # ------------------------------------layout----------------------------------------
75
 
@@ -77,7 +156,7 @@ with st.sidebar:
77
  api_server_info = st.text_input("Local LLM API server", OPENAI_API_BASE ,key="openai_api_base")
78
  st.title("πŸ€– Llama Index πŸ“š")
79
  if st.button('Clear Memory'):
80
- st.session_state.memory = ""
81
  st.write("Local LLM API server in this demo is useles, we are loading local model using llama_index integration of llama cpp")
82
  st.write("πŸš€ This app allows you to chat with local LLM using api server or loaded in cache")
83
  st.subheader("πŸ’» System Requirements: ")
@@ -89,43 +168,26 @@ with st.sidebar:
89
 
90
  # Define your app's tabs
91
  tab1, tab2, tab3 = st.tabs(["LLM only", "LLM RAG QA with database", "One single document Q&A"])
92
-
93
- # -----------------------------------LLM only---------------------------------------------
94
  if 'memory' not in st.session_state:
95
  st.session_state.memory = ""
96
- #token_count = 0
 
 
 
 
97
  with tab1:
98
  st.title("πŸ’¬ LLM only")
99
  prompt = st.text_input(
100
  "Ask your question here",
101
- placeholder="Who is Lionel Messi",
102
- )
103
- template = (
104
- "system\n"
105
- "You are Dolphin, a helpful AI assistant. Your responses should be based solely on the content of documents you have access to. "
106
- "Do not provide information that is not contained in the documents. "
107
- "If a question is asked about content not in the documents, respond with 'I do not have that information.' "
108
- "Always respond in the same language as the question was asked. Be concise.\n"
109
- "user\n"
110
- "{prompt}\n"
111
- "assistant\n"
112
  )
113
  if prompt:
114
  contextual_prompt = st.session_state.memory + "\n" + prompt
115
- formatted_prompt = template.format(prompt=contextual_prompt)
116
-
117
- response = llm.complete(formatted_prompt,max_tokens=100, temperature=0, top_p=0.95, top_k=10)
118
- #print(response)
119
  text_response = response
120
- #---------------------------------------------
121
- # text_response = response["choices"][0]["text"]
122
- # token_count += response["usage"]["total_tokens"]
123
- # st.write("LLM's Response:\n", text_response)
124
- # st.write("Token count:\n", token_count)
125
- #---------------------------------------------
126
- st.write("LLM's Response:\n",text_response)
127
  st.session_state.memory = f"Prompt: {contextual_prompt}\nResponse:\n {text_response}"
128
- #st.write("Memory:\n", memory)
129
  with open("short_memory.txt", 'w') as file:
130
  file.write(st.session_state.memory)
131
 
@@ -133,34 +195,30 @@ with tab1:
133
 
134
  with tab2:
135
  st.title("πŸ’¬ LLM RAG QA with database")
136
- st.write("To consult files that are available in the database, go to https://huggingface.co/spaces/mohcineelharras/llama-index-docs-spaces/blob/main/data")
137
  prompt = st.text_input(
138
  "Ask your question here",
139
  placeholder="How does the blockchain work ?",
140
  )
141
  if prompt:
142
- response = query_engine.query(prompt)
143
- st.write("Your prompt: ", prompt)
144
- st.write("LLM's Response:\n"+ response.response)
 
 
 
145
  with st.expander("Document Similarity Search"):
146
  for i, node in enumerate(response.source_nodes):
147
  dict_source_i = node.node.metadata
148
  dict_source_i.update({"Text":node.node.text})
149
  st.write("Source nΒ°"+str(i+1), dict_source_i)
150
- st.write()
 
 
 
151
 
152
- # -----------------------------------Upload File Q&A-----------------------------------------
153
 
154
- def load_emb_uploaded_document(filename):
155
- # You may want to add a check to prevent execution during initialization.
156
- if 'init' in st.session_state:
157
- embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base")
158
- service_context = ServiceContext.from_defaults(embed_model=embed_model_inst, llm=llm)
159
- documents = SimpleDirectoryReader(input_files=[filename]).load_data()
160
- index = VectorStoreIndex.from_documents(
161
- documents, service_context=service_context, show_progress=True)
162
- return index.as_query_engine()
163
- return None
164
 
165
  with tab3:
166
  st.title("πŸ“ One single document Q&A with Llama Index using local open llms")
@@ -190,11 +248,13 @@ with tab3:
190
  st.write("File ",uploaded_file.name, "was loaded successfully")
191
 
192
  if uploaded_file and question and api_server_info:
193
- response = prompt = f"""Based on the context presented. Respond to the question below to the best of your ability.
194
- \n\n{question}"""
195
- response = query_engine.query(prompt)
196
  st.write("### Answer")
197
- st.write(response.response)
 
 
198
  with st.expander("Document Similarity Search"):
199
  #st.write(len(response.source_nodes))
200
  for i, node in enumerate(response.source_nodes):
 
12
  from llama_index import ServiceContext, VectorStoreIndex, SimpleDirectoryReader
13
  from tqdm.notebook import tqdm
14
  from dotenv import load_dotenv
15
+ from llama_index.llms import ChatMessage, MessageRole
16
+ from llama_index.prompts import ChatPromptTemplate
17
 
18
  # --------------------------------env variables-----------------------------------
19
 
 
24
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
25
  OPENAI_API_BASE = os.getenv("OPENAI_API_BASE")
26
 
27
+ # Text QA Prompt
28
+ chat_text_qa_msgs = [
29
+ ChatMessage(
30
+ role=MessageRole.SYSTEM,
31
+ content=(
32
+ "You are Dolphin, a helpful AI assistant. "
33
+ "Answer questions based solely on the context provided. "
34
+ "Do not use information outside of the context. "
35
+ "Respond in the same language as the question. Be concise."
36
+ ),
37
+ ),
38
+ ChatMessage(
39
+ role=MessageRole.USER,
40
+ content=(
41
+ "Context information is below:\n"
42
+ "---------------------\n"
43
+ "{context_str}\n"
44
+ "---------------------\n"
45
+ "Based on this context, answer the question: {query_str}\n"
46
+ ),
47
+ ),
48
+ ]
49
+ text_qa_template = ChatPromptTemplate(chat_text_qa_msgs)
50
+
51
+ # Refine Prompt
52
+ chat_refine_msgs = [
53
+ ChatMessage(
54
+ role=MessageRole.SYSTEM,
55
+ content=(
56
+ "You are Dolphin, focused on refining answers with additional context. "
57
+ "Use new context to refine the answer. "
58
+ "If the new context isn't useful, restate the original answer. "
59
+ "Be precise and match the language of the query."
60
+ ),
61
+ ),
62
+ ChatMessage(
63
+ role=MessageRole.USER,
64
+ content=(
65
+ "New context for refinement:\n"
66
+ "------------\n"
67
+ "{context_msg}\n"
68
+ "------------\n"
69
+ "Refine the original answer with this context for the question: {query_str}. "
70
+ "Original Answer: {existing_answer}"
71
+ ),
72
+ ),
73
+ ]
74
+
75
+ refine_template = ChatPromptTemplate(chat_refine_msgs)
76
+
77
+ template = (
78
+ "system\n"
79
+ "\"You are Dolphin, a helpful AI assistant. Your responses should be based solely on the content of documents you have access to, "
80
+ "including the specific context provided below. Do not provide information that is not contained in the documents or the context. "
81
+ "If a question is asked about content not in the documents or context, respond with 'I do not have that information.' "
82
+ "Always respond in the same language as the question was asked. Be concise.\n"
83
+ "Respond to the best of your ability. Try to respond in markdown.\"\n"
84
+ "context\n"
85
+ "{context}\n"
86
+ "user\n"
87
+ "{prompt}\n"
88
+ "assistant\n"
89
+ )
90
+
91
+
92
  # --------------------------------cache LLM-----------------------------------
93
 
94
  logging.basicConfig(stream=sys.stdout, level=logging.INFO)
95
  logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
96
  llama_debug = LlamaDebugHandler(print_trace_on_end=True)
97
  callback_manager = CallbackManager([llama_debug])
98
+
99
+ #One doc embedding
100
+ def load_emb_uploaded_document(filename):
101
+ # You may want to add a check to prevent execution during initialization.
102
+ if 'init' in st.session_state:
103
+ embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base")
104
+ service_context = ServiceContext.from_defaults(embed_model=embed_model_inst, llm=llm, chunk_size_limit=500)
105
+ documents = SimpleDirectoryReader(input_files=[filename]).load_data()
106
+ index = VectorStoreIndex.from_documents(
107
+ documents, service_context=service_context, show_progress=True)
108
+ return index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template)
109
+ return None
110
+
111
+
112
+
113
  # LLM
114
  @st.cache_resource
115
  def load_llm_model():
 
122
  model_path="models/dolphin-2.1-mistral-7b.Q4_K_S.gguf",
123
  temperature=0.0,
124
  max_new_tokens=100,
125
+ context_window=2048,
126
  generate_kwargs={},
127
  model_kwargs={"n_gpu_layers": 20},
128
  messages_to_prompt=messages_to_prompt,
 
131
  )
132
  return llm
133
 
 
 
134
  # --------------------------------cache Embedding model-----------------------------------
135
 
136
  @st.cache_resource
 
142
  embed_model_inst = InstructorEmbedding("models/hkunlp_instructor-base"
143
  #model_name="hkunlp/instructor-base"
144
  )
145
+ service_context = ServiceContext.from_defaults(embed_model=embed_model_inst,
146
+ llm=llm)
147
  documents = SimpleDirectoryReader("data").load_data()
148
  print(f"Number of documents: {len(documents)}")
149
  index = VectorStoreIndex.from_documents(
150
  documents, service_context=service_context, show_progress=True)
151
+ return index.as_query_engine(text_qa_template=text_qa_template, refine_template=refine_template)
 
 
152
 
153
  # ------------------------------------layout----------------------------------------
154
 
 
156
  api_server_info = st.text_input("Local LLM API server", OPENAI_API_BASE ,key="openai_api_base")
157
  st.title("πŸ€– Llama Index πŸ“š")
158
  if st.button('Clear Memory'):
159
+ del st.session_state["memory"]
160
  st.write("Local LLM API server in this demo is useles, we are loading local model using llama_index integration of llama cpp")
161
  st.write("πŸš€ This app allows you to chat with local LLM using api server or loaded in cache")
162
  st.subheader("πŸ’» System Requirements: ")
 
168
 
169
  # Define your app's tabs
170
  tab1, tab2, tab3 = st.tabs(["LLM only", "LLM RAG QA with database", "One single document Q&A"])
 
 
171
  if 'memory' not in st.session_state:
172
  st.session_state.memory = ""
173
+ llm = load_llm_model()
174
+ query_engine = load_emb_model()
175
+
176
+ # -----------------------------------LLM only---------------------------------------------
177
+
178
  with tab1:
179
  st.title("πŸ’¬ LLM only")
180
  prompt = st.text_input(
181
  "Ask your question here",
182
+ placeholder="Who is Mohcine",
 
 
 
 
 
 
 
 
 
 
183
  )
184
  if prompt:
185
  contextual_prompt = st.session_state.memory + "\n" + prompt
186
+ response = llm.complete(prompt,max_tokens=100, temperature=0, top_p=0.95, top_k=10)
 
 
 
187
  text_response = response
188
+ st.write("### Answer")
189
+ st.markdown(text_response)
 
 
 
 
 
190
  st.session_state.memory = f"Prompt: {contextual_prompt}\nResponse:\n {text_response}"
 
191
  with open("short_memory.txt", 'w') as file:
192
  file.write(st.session_state.memory)
193
 
 
195
 
196
  with tab2:
197
  st.title("πŸ’¬ LLM RAG QA with database")
198
+ st.write("To consult files that are available in the database, go to https://huggingface.co/spaces/mohcineelharras/llama-index-docs-spaces/tree/main/data")
199
  prompt = st.text_input(
200
  "Ask your question here",
201
  placeholder="How does the blockchain work ?",
202
  )
203
  if prompt:
204
+ contextual_prompt = st.session_state.memory + "\n" + prompt
205
+ response = query_engine.query(contextual_prompt)
206
+ text_response = response.response
207
+ st.write("### Answer")
208
+ st.markdown(text_response)
209
+ st.session_state.memory = f"Prompt: {contextual_prompt}\nResponse:\n {text_response}"
210
  with st.expander("Document Similarity Search"):
211
  for i, node in enumerate(response.source_nodes):
212
  dict_source_i = node.node.metadata
213
  dict_source_i.update({"Text":node.node.text})
214
  st.write("Source nΒ°"+str(i+1), dict_source_i)
215
+ break
216
+ st.session_state.memory = f"Prompt: {contextual_prompt}\nResponse:\n {text_response}"
217
+ with open("short_memory.txt", 'w') as file:
218
+ file.write(st.session_state.memory)
219
 
 
220
 
221
+ # -----------------------------------Upload File Q&A-----------------------------------------
 
 
 
 
 
 
 
 
 
222
 
223
  with tab3:
224
  st.title("πŸ“ One single document Q&A with Llama Index using local open llms")
 
248
  st.write("File ",uploaded_file.name, "was loaded successfully")
249
 
250
  if uploaded_file and question and api_server_info:
251
+ contextual_prompt = st.session_state.memory + "\n" + question
252
+ response = query_engine.query(contextual_prompt)
253
+ text_response = response.response
254
  st.write("### Answer")
255
+ st.session_state.memory = f"Prompt: {contextual_prompt}\nResponse:\n {text_response}"
256
+ with open("short_memory.txt", 'w') as file:
257
+ file.write(st.session_state.memory)
258
  with st.expander("Document Similarity Search"):
259
  #st.write(len(response.source_nodes))
260
  for i, node in enumerate(response.source_nodes):
data/doctest.txt CHANGED
@@ -1,3 +1,5 @@
1
- Hi my name is Mohcine,
2
  I am 25 years old
3
- I am a freelancer
 
 
 
1
+ Hi my name is Mohcine
2
  I am 25 years old
3
+ I am a freelancer
4
+ I am interested in crypto
5
+ I worked at EDF and Enedis