Spaces:

arslan-ahmed
/

talk-to-your-docs

Running

App Files Files Community

arslan-ahmed commited on Sep 22, 2023

Commit

a0c907f

1 Parent(s): 49f0f1d

llama prompt

Browse files

Files changed (4) hide show

app.py +9 -2
requirements.txt +2 -1
ttyd_consts.py +11 -0
ttyd_functions.py +3 -1

app.py CHANGED Viewed

@@ -169,12 +169,18 @@ def updateQaChain(temp, k, modelNameDD, stdlQs, api_key_st, vsDict_st):
     # settingsUpdated = 'Settings updated:'+ ' Model=' + modelName + ', Temp=' + str(temp)+ ', k=' + str(k)
     # gr.Info(settingsUpdated)
     # Now create QA Chain using the LLM
     if stdlQs==0: # 0th index i.e. first option
         qa_chain_st = RetrievalQA.from_llm(
                     llm=llm,
                     retriever=vsDict_st['chromaClient'].as_retriever(search_type="similarity", search_kwargs={"k": int(k)}),
                     return_source_documents=True,
                     input_key = 'question', output_key='answer' # to align with ConversationalRetrievalChain for downstream functions
                 )
     else:
@@ -184,7 +190,8 @@ def updateQaChain(temp, k, modelNameDD, stdlQs, api_key_st, vsDict_st):
                     retriever=vsDict_st['chromaClient'].as_retriever(search_type="similarity", search_kwargs={"k": int(k)}),
                     rephrase_question=rephQs,
                     return_source_documents=True,
-                    return_generated_question=True
                 )
     return qa_chain_st, model_dd.update(value=modelNameDD)
@@ -234,7 +241,7 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue='orange', secondary_hue='gray
                             , info='Internal IBMers only')
                     bamKey_btn = gr.Button("Submit BAM API Key")
             with gr.Row(visible=mode.uiAddDataVis):
-                upload_fb = gr.Files(scale=5, label="Upload (multiple) Files - pdf/txt/docx supported", file_types=['.doc', '.docx', 'text', '.pdf', '.csv'])
                 urls_tb = gr.Textbox(scale=5, label="Enter URLs starting with https (comma separated)"\
                                     , info=url_tb_info\
                                     , placeholder=url_tb_ph)

     # settingsUpdated = 'Settings updated:'+ ' Model=' + modelName + ', Temp=' + str(temp)+ ', k=' + str(k)
     # gr.Info(settingsUpdated)
+    if 'meta-llama/llama-2' in modelNameDD:
+        prompt = promptLlama
+    else:
+        prompt = None
     # Now create QA Chain using the LLM
     if stdlQs==0: # 0th index i.e. first option
         qa_chain_st = RetrievalQA.from_llm(
                     llm=llm,
                     retriever=vsDict_st['chromaClient'].as_retriever(search_type="similarity", search_kwargs={"k": int(k)}),
                     return_source_documents=True,
+                    prompt=prompt,
                     input_key = 'question', output_key='answer' # to align with ConversationalRetrievalChain for downstream functions
                 )
     else:
                     retriever=vsDict_st['chromaClient'].as_retriever(search_type="similarity", search_kwargs={"k": int(k)}),
                     rephrase_question=rephQs,
                     return_source_documents=True,
+                    return_generated_question=True,
+                    combine_docs_chain_kwargs={'prompt':promptLlama}
                 )
     return qa_chain_st, model_dd.update(value=modelNameDD)
                             , info='Internal IBMers only')
                     bamKey_btn = gr.Button("Submit BAM API Key")
             with gr.Row(visible=mode.uiAddDataVis):
+                upload_fb = gr.Files(scale=5, label="Upload (multiple) Files - pdf/txt/docx supported", file_types=['.doc', '.docx', 'text', '.pdf', '.csv', '.ppt', '.pptx'])
                 urls_tb = gr.Textbox(scale=5, label="Enter URLs starting with https (comma separated)"\
                                     , info=url_tb_info\
                                     , placeholder=url_tb_ph)

requirements.txt CHANGED Viewed

@@ -10,4 +10,5 @@ gdown
 docx2txt
 sentence-transformers
 ibm-watson-machine-learning
-ibm-generative-ai

 docx2txt
 sentence-transformers
 ibm-watson-machine-learning
+ibm-generative-ai
+"unstructured[all-docs]"

ttyd_consts.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes
 import os
 from dotenv import load_dotenv
@@ -34,6 +35,16 @@ stdlQs_rb_choices =  ['Retrieve relavant docs using original question, send orig
                     , 'Retrieve relavant docs using standalone question, send standalone question to LLM']
 bam_models = sorted(['bigscience/bloom',
  'salesforce/codegen2-16b',
  'codellama/codellama-34b-instruct',

+from langchain import PromptTemplate
 from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes
 import os
 from dotenv import load_dotenv
                     , 'Retrieve relavant docs using standalone question, send standalone question to LLM']
+llamaPromptTemplate = """
+<s>[INST] <<SYS>>
+Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
+<</SYS>>
+{context}
+Question: {question} [/INST]
+"""
+promptLlama=PromptTemplate(input_variables=['context', 'question'], template=llamaPromptTemplate)
 bam_models = sorted(['bigscience/bloom',
  'salesforce/codegen2-16b',
  'codellama/codellama-34b-instruct',

ttyd_functions.py CHANGED Viewed

@@ -10,7 +10,7 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.embeddings import SentenceTransformerEmbeddings
 import os
-from langchain.document_loaders import WebBaseLoader, TextLoader, Docx2txtLoader, PyMuPDFLoader
 from whatsapp_chat_custom import WhatsAppChatLoader # use this instead of from langchain.document_loaders import WhatsAppChatLoader
 from collections import deque
@@ -241,6 +241,8 @@ def ingestFiles(documents, files_list, prog=None):
             doc = Docx2txtLoader(fPath).load()
         elif 'WhatsApp Chat with' in fPath and fPath.endswith('.csv'): # Convert Whatsapp TXT files to CSV using https://whatstk.streamlit.app/
             doc = WhatsAppChatLoader(fPath).load()
         else:
             pass

 from langchain.embeddings import SentenceTransformerEmbeddings
 import os
+from langchain.document_loaders import WebBaseLoader, TextLoader, Docx2txtLoader, PyMuPDFLoader, UnstructuredPowerPointLoader
 from whatsapp_chat_custom import WhatsAppChatLoader # use this instead of from langchain.document_loaders import WhatsAppChatLoader
 from collections import deque
             doc = Docx2txtLoader(fPath).load()
         elif 'WhatsApp Chat with' in fPath and fPath.endswith('.csv'): # Convert Whatsapp TXT files to CSV using https://whatstk.streamlit.app/
             doc = WhatsAppChatLoader(fPath).load()
+        elif fPath.endswith(('.ppt', '.pptx')):
+            doc = UnstructuredPowerPointLoader(fPath).load()
         else:
             pass