arslan-ahmed commited on
Commit
a0c907f
1 Parent(s): 49f0f1d

llama prompt

Browse files
Files changed (4) hide show
  1. app.py +9 -2
  2. requirements.txt +2 -1
  3. ttyd_consts.py +11 -0
  4. ttyd_functions.py +3 -1
app.py CHANGED
@@ -169,12 +169,18 @@ def updateQaChain(temp, k, modelNameDD, stdlQs, api_key_st, vsDict_st):
169
  # settingsUpdated = 'Settings updated:'+ ' Model=' + modelName + ', Temp=' + str(temp)+ ', k=' + str(k)
170
  # gr.Info(settingsUpdated)
171
 
 
 
 
 
 
172
  # Now create QA Chain using the LLM
173
  if stdlQs==0: # 0th index i.e. first option
174
  qa_chain_st = RetrievalQA.from_llm(
175
  llm=llm,
176
  retriever=vsDict_st['chromaClient'].as_retriever(search_type="similarity", search_kwargs={"k": int(k)}),
177
  return_source_documents=True,
 
178
  input_key = 'question', output_key='answer' # to align with ConversationalRetrievalChain for downstream functions
179
  )
180
  else:
@@ -184,7 +190,8 @@ def updateQaChain(temp, k, modelNameDD, stdlQs, api_key_st, vsDict_st):
184
  retriever=vsDict_st['chromaClient'].as_retriever(search_type="similarity", search_kwargs={"k": int(k)}),
185
  rephrase_question=rephQs,
186
  return_source_documents=True,
187
- return_generated_question=True
 
188
  )
189
 
190
  return qa_chain_st, model_dd.update(value=modelNameDD)
@@ -234,7 +241,7 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue='orange', secondary_hue='gray
234
  , info='Internal IBMers only')
235
  bamKey_btn = gr.Button("Submit BAM API Key")
236
  with gr.Row(visible=mode.uiAddDataVis):
237
- upload_fb = gr.Files(scale=5, label="Upload (multiple) Files - pdf/txt/docx supported", file_types=['.doc', '.docx', 'text', '.pdf', '.csv'])
238
  urls_tb = gr.Textbox(scale=5, label="Enter URLs starting with https (comma separated)"\
239
  , info=url_tb_info\
240
  , placeholder=url_tb_ph)
 
169
  # settingsUpdated = 'Settings updated:'+ ' Model=' + modelName + ', Temp=' + str(temp)+ ', k=' + str(k)
170
  # gr.Info(settingsUpdated)
171
 
172
+ if 'meta-llama/llama-2' in modelNameDD:
173
+ prompt = promptLlama
174
+ else:
175
+ prompt = None
176
+
177
  # Now create QA Chain using the LLM
178
  if stdlQs==0: # 0th index i.e. first option
179
  qa_chain_st = RetrievalQA.from_llm(
180
  llm=llm,
181
  retriever=vsDict_st['chromaClient'].as_retriever(search_type="similarity", search_kwargs={"k": int(k)}),
182
  return_source_documents=True,
183
+ prompt=prompt,
184
  input_key = 'question', output_key='answer' # to align with ConversationalRetrievalChain for downstream functions
185
  )
186
  else:
 
190
  retriever=vsDict_st['chromaClient'].as_retriever(search_type="similarity", search_kwargs={"k": int(k)}),
191
  rephrase_question=rephQs,
192
  return_source_documents=True,
193
+ return_generated_question=True,
194
+ combine_docs_chain_kwargs={'prompt':promptLlama}
195
  )
196
 
197
  return qa_chain_st, model_dd.update(value=modelNameDD)
 
241
  , info='Internal IBMers only')
242
  bamKey_btn = gr.Button("Submit BAM API Key")
243
  with gr.Row(visible=mode.uiAddDataVis):
244
+ upload_fb = gr.Files(scale=5, label="Upload (multiple) Files - pdf/txt/docx supported", file_types=['.doc', '.docx', 'text', '.pdf', '.csv', '.ppt', '.pptx'])
245
  urls_tb = gr.Textbox(scale=5, label="Enter URLs starting with https (comma separated)"\
246
  , info=url_tb_info\
247
  , placeholder=url_tb_ph)
requirements.txt CHANGED
@@ -10,4 +10,5 @@ gdown
10
  docx2txt
11
  sentence-transformers
12
  ibm-watson-machine-learning
13
- ibm-generative-ai
 
 
10
  docx2txt
11
  sentence-transformers
12
  ibm-watson-machine-learning
13
+ ibm-generative-ai
14
+ "unstructured[all-docs]"
ttyd_consts.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes
2
  import os
3
  from dotenv import load_dotenv
@@ -34,6 +35,16 @@ stdlQs_rb_choices = ['Retrieve relavant docs using original question, send orig
34
  , 'Retrieve relavant docs using standalone question, send standalone question to LLM']
35
 
36
 
 
 
 
 
 
 
 
 
 
 
37
  bam_models = sorted(['bigscience/bloom',
38
  'salesforce/codegen2-16b',
39
  'codellama/codellama-34b-instruct',
 
1
+ from langchain import PromptTemplate
2
  from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes
3
  import os
4
  from dotenv import load_dotenv
 
35
  , 'Retrieve relavant docs using standalone question, send standalone question to LLM']
36
 
37
 
38
+ llamaPromptTemplate = """
39
+ <s>[INST] <<SYS>>
40
+ Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
41
+ <</SYS>>
42
+ {context}
43
+ Question: {question} [/INST]
44
+ """
45
+
46
+ promptLlama=PromptTemplate(input_variables=['context', 'question'], template=llamaPromptTemplate)
47
+
48
  bam_models = sorted(['bigscience/bloom',
49
  'salesforce/codegen2-16b',
50
  'codellama/codellama-34b-instruct',
ttyd_functions.py CHANGED
@@ -10,7 +10,7 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
  from langchain.embeddings import SentenceTransformerEmbeddings
11
 
12
  import os
13
- from langchain.document_loaders import WebBaseLoader, TextLoader, Docx2txtLoader, PyMuPDFLoader
14
  from whatsapp_chat_custom import WhatsAppChatLoader # use this instead of from langchain.document_loaders import WhatsAppChatLoader
15
 
16
  from collections import deque
@@ -241,6 +241,8 @@ def ingestFiles(documents, files_list, prog=None):
241
  doc = Docx2txtLoader(fPath).load()
242
  elif 'WhatsApp Chat with' in fPath and fPath.endswith('.csv'): # Convert Whatsapp TXT files to CSV using https://whatstk.streamlit.app/
243
  doc = WhatsAppChatLoader(fPath).load()
 
 
244
  else:
245
  pass
246
 
 
10
  from langchain.embeddings import SentenceTransformerEmbeddings
11
 
12
  import os
13
+ from langchain.document_loaders import WebBaseLoader, TextLoader, Docx2txtLoader, PyMuPDFLoader, UnstructuredPowerPointLoader
14
  from whatsapp_chat_custom import WhatsAppChatLoader # use this instead of from langchain.document_loaders import WhatsAppChatLoader
15
 
16
  from collections import deque
 
241
  doc = Docx2txtLoader(fPath).load()
242
  elif 'WhatsApp Chat with' in fPath and fPath.endswith('.csv'): # Convert Whatsapp TXT files to CSV using https://whatstk.streamlit.app/
243
  doc = WhatsAppChatLoader(fPath).load()
244
+ elif fPath.endswith(('.ppt', '.pptx')):
245
+ doc = UnstructuredPowerPointLoader(fPath).load()
246
  else:
247
  pass
248