Spaces:

xangma
/

chat-pykg

Runtime error

App Files Files Community

xangma commited on Apr 16, 2023

Commit

572a6c9

•

1 Parent(s): df62f91

latest

Browse files

Files changed (3) hide show

app.py +65 -41
chain.py +62 -10
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -8,14 +8,12 @@ import string
 import sys
 from pathlib import Path
 import numpy as np
 import chromadb
 import gradio as gr
 from chromadb.config import Settings
 from langchain.docstore.document import Document
 from langchain.embeddings import HuggingFaceEmbeddings, OpenAIEmbeddings
 from langchain.vectorstores import Chroma
-from langchain.retrievers import SVMRetriever
 from chain import get_new_chain1
 from ingest import embedding_chooser, ingest_docs
 logging.basicConfig(stream=sys.stdout, level=logging.INFO)
@@ -97,14 +95,18 @@ def merge_collections(collection_load_names, vs_state, k_textbox, search_type_se
             #         merged_vectorstore.append(f.readlines())
     return merged_vectorstore
-def set_chain_up(openai_api_key, model_selector, k_textbox, search_type_selector, max_tokens_textbox, vectorstore_radio, vectorstore, agent):
     if not agent or type(agent) == str:
         if vectorstore != None:
             if model_selector in ["gpt-3.5-turbo", "gpt-4"]:
                 if openai_api_key:
                     os.environ["OPENAI_API_KEY"] = openai_api_key
                     qa_chain = get_new_chain1(vectorstore, vectorstore_radio, model_selector, k_textbox, search_type_selector, max_tokens_textbox)
                     os.environ["OPENAI_API_KEY"] = ""
                     return qa_chain
                 else:
                     return 'no_open_aikey'
@@ -197,39 +199,7 @@ with block:
     with gr.Tabs() as tabs:
         with gr.TabItem("Chat", id=0):
             with gr.Row():
-                openai_api_key_textbox = gr.Textbox(
-                    placeholder="Paste your OpenAI API key (sk-...)",
-                    show_label=False,
-                    lines=1,
-                    type="password",
-                )
-                model_selector = gr.Dropdown(
-                    choices=["gpt-3.5-turbo", "gpt-4", "other"],
-                    label="Model",
-                    show_label=True,
-                    value = "gpt-3.5-turbo"
-                )
-                k_textbox = gr.Textbox(
-                    placeholder="k: Number of search results to consider",
-                    label="Search Results k:",
-                    show_label=True,
-                    lines=1,
-                    value="20",
-                )
-                search_type_selector = gr.Dropdown(
-                    choices=["similarity", "mmr", "svm"],
-                    label="Search Type",
-                    show_label=True,
-                    value = "similarity"
-                )
-                max_tokens_textbox = gr.Textbox(
-                    placeholder="max_tokens: Maximum number of tokens to generate",
-                    label="max_tokens",
-                    show_label=True,
-                    lines=1,
-                    value="1000",
-                )
-            chatbot = gr.Chatbot()
             with gr.Row():
                 clear_btn = gr.Button("Clear Chat", variant="secondary").style(full_width=False)
                 message = gr.Textbox(
@@ -240,12 +210,66 @@ with block:
                 submit = gr.Button(value="Send").style(full_width=False)
             gr.Examples(
                 examples=[
-                    "What does this code do?",
                     "I want to change the chat-pykg app to have a log viewer, where the user can see what python is doing in the background. How could I do that?",
-                    "Hello, I want to allow chat-pykg to search the internet before answering, can you help me change the code to do that? Thanks.",
                 ],
                 inputs=message,
             )
             gr.HTML(
                 """
@@ -318,8 +342,8 @@ with block:
         debug_state.value = False
         radio_state = gr.State()
-        submit.click(set_chain_up, inputs=[openai_api_key_textbox, model_selector, k_textbox, search_type_selector, max_tokens_textbox, select_vectorstore_radio, vs_state, agent_state], outputs=[agent_state]).then(chat, inputs=[message, history_state, agent_state], outputs=[chatbot, history_state])
-        message.submit(set_chain_up, inputs=[openai_api_key_textbox, model_selector, k_textbox, search_type_selector, max_tokens_textbox, select_vectorstore_radio, vs_state, agent_state], outputs=[agent_state]).then(chat, inputs=[message, history_state, agent_state], outputs=[chatbot, history_state])
         load_collections_button.click(merge_collections, inputs=[collections_viewer, vs_state, k_textbox, search_type_selector, select_vectorstore_radio, select_embedding_radio], outputs=[vs_state])#.then(change_tab, None, tabs) #.then(set_chain_up, inputs=[openai_api_key_textbox, model_selector, k_textbox, max_tokens_textbox, vs_state, agent_state], outputs=[agent_state])
         make_collections_button.click(ingest_docs, inputs=[all_collections_state, all_collections_to_get, chunk_size_textbox, chunk_overlap_textbox, select_vectorstore_radio, select_embedding_radio, debug_state], outputs=[all_collections_state, all_collections_to_get], show_progress=True).then(update_checkboxgroup, inputs = [all_collections_state], outputs = [collections_viewer])
@@ -334,7 +358,7 @@ with block:
         select_vectorstore_radio.change(update_radio, inputs = select_vectorstore_radio, outputs = make_vectorstore_radio)
         # Whenever chain parameters change, destroy the agent.
-        input_list = [openai_api_key_textbox, model_selector, k_textbox, max_tokens_textbox, select_vectorstore_radio, make_embedding_radio]
         output_list = [agent_state]
         for input_item in input_list:
             input_item.change(

 import sys
 from pathlib import Path
 import numpy as np
 import chromadb
 import gradio as gr
 from chromadb.config import Settings
 from langchain.docstore.document import Document
 from langchain.embeddings import HuggingFaceEmbeddings, OpenAIEmbeddings
 from langchain.vectorstores import Chroma
 from chain import get_new_chain1
 from ingest import embedding_chooser, ingest_docs
 logging.basicConfig(stream=sys.stdout, level=logging.INFO)
             #         merged_vectorstore.append(f.readlines())
     return merged_vectorstore
+def set_chain_up(openai_api_key, google_api_key, google_cse_id, model_selector, k_textbox, search_type_selector, max_tokens_textbox, vectorstore_radio, vectorstore, agent):
     if not agent or type(agent) == str:
         if vectorstore != None:
             if model_selector in ["gpt-3.5-turbo", "gpt-4"]:
                 if openai_api_key:
                     os.environ["OPENAI_API_KEY"] = openai_api_key
+                    os.environ["GOOGLE_API_KEY"] = google_api_key
+                    os.environ["GOOGLE_CSE_ID"] = google_cse_id
                     qa_chain = get_new_chain1(vectorstore, vectorstore_radio, model_selector, k_textbox, search_type_selector, max_tokens_textbox)
                     os.environ["OPENAI_API_KEY"] = ""
+                    os.environ["GOOGLE_API_KEY"] = ""
+                    os.environ["GOOGLE_CSE_ID"] = ""
                     return qa_chain
                 else:
                     return 'no_open_aikey'
     with gr.Tabs() as tabs:
         with gr.TabItem("Chat", id=0):
             with gr.Row():
+                chatbot = gr.Chatbot()
             with gr.Row():
                 clear_btn = gr.Button("Clear Chat", variant="secondary").style(full_width=False)
                 message = gr.Textbox(
                 submit = gr.Button(value="Send").style(full_width=False)
             gr.Examples(
                 examples=[
                     "I want to change the chat-pykg app to have a log viewer, where the user can see what python is doing in the background. How could I do that?",
+                    "Hello, I want to allow chat-pykg to search google before answering. In the langchain docs it says you can use a tool to do this: from langchain.agents import load_tools\ntools = load_tools([“google-search”]). How would I need to change get_new_chain1 function to use tools when it needs to as well as searching the vectorstore? Thanks!",
+                    "Great, thanks. What if I want to add other tools in the future? Can you please change get_new_chain1 function to do that?"
                 ],
                 inputs=message,
             )
+            with gr.Row():
+                with gr.Column(scale=1):
+                    model_selector = gr.Dropdown(
+                        choices=["gpt-3.5-turbo", "gpt-4", "other"],
+                        label="Model",
+                        show_label=True,
+                        value = "gpt-4"
+                    )
+                    k_textbox = gr.Textbox(
+                        placeholder="k: Number of search results to consider",
+                        label="Search Results k:",
+                        show_label=True,
+                        lines=1,
+                        value="20",
+                    )
+                    search_type_selector = gr.Dropdown(
+                        choices=["similarity", "mmr", "svm"],
+                        label="Search Type",
+                        show_label=True,
+                        value = "similarity"
+                    )
+                    max_tokens_textbox = gr.Textbox(
+                        placeholder="max_tokens: Maximum number of tokens to generate",
+                        label="max_tokens",
+                        show_label=True,
+                        lines=1,
+                        value="500",
+                    )
+                with gr.Column(scale=1):
+                    gr.HTML("")
+                with gr.Column(scale=1):
+                    gr.HTML("")
+                with gr.Column(scale=1):
+                    openai_api_key_textbox = gr.Textbox(
+                        placeholder="Paste your OpenAI API key (sk-...)",
+                        show_label=True,
+                        lines=1,
+                        type="password",
+                        label="OpenAI API Key",
+                    )
+                    google_api_key_textbox = gr.Textbox(
+                        placeholder="Paste your Google API key (AIza...)",
+                        show_label=True,
+                        lines=1,
+                        type="password",
+                        label="Google API Key",
+                    )
+                    google_cse_id_textbox = gr.Textbox(
+                        placeholder="Paste your Google CSE ID (0123...)",
+                        show_label=True,
+                        lines=1,
+                        type="password",
+                        label="Google CSE ID",
+                    )
             gr.HTML(
                 """
         debug_state.value = False
         radio_state = gr.State()
+        submit.click(set_chain_up, inputs=[openai_api_key_textbox, google_api_key_textbox, google_cse_id_textbox, model_selector, k_textbox, search_type_selector, max_tokens_textbox, select_vectorstore_radio, vs_state, agent_state], outputs=[agent_state]).then(chat, inputs=[message, history_state, agent_state], outputs=[chatbot, history_state])
+        message.submit(set_chain_up, inputs=[openai_api_key_textbox, google_api_key_textbox, google_cse_id_textbox, model_selector, k_textbox, search_type_selector, max_tokens_textbox, select_vectorstore_radio, vs_state, agent_state], outputs=[agent_state]).then(chat, inputs=[message, history_state, agent_state], outputs=[chatbot, history_state])
         load_collections_button.click(merge_collections, inputs=[collections_viewer, vs_state, k_textbox, search_type_selector, select_vectorstore_radio, select_embedding_radio], outputs=[vs_state])#.then(change_tab, None, tabs) #.then(set_chain_up, inputs=[openai_api_key_textbox, model_selector, k_textbox, max_tokens_textbox, vs_state, agent_state], outputs=[agent_state])
         make_collections_button.click(ingest_docs, inputs=[all_collections_state, all_collections_to_get, chunk_size_textbox, chunk_overlap_textbox, select_vectorstore_radio, select_embedding_radio, debug_state], outputs=[all_collections_state, all_collections_to_get], show_progress=True).then(update_checkboxgroup, inputs = [all_collections_state], outputs = [collections_viewer])
         select_vectorstore_radio.change(update_radio, inputs = select_vectorstore_radio, outputs = make_vectorstore_radio)
         # Whenever chain parameters change, destroy the agent.
+        input_list = [openai_api_key_textbox, model_selector, k_textbox, search_type_selector, max_tokens_textbox, select_vectorstore_radio, make_embedding_radio]
         output_list = [agent_state]
         for input_item in input_list:
             input_item.change(

chain.py CHANGED Viewed

@@ -15,28 +15,70 @@ from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
 from langchain.chains.llm import LLMChain
 from langchain.schema import BaseLanguageModel, BaseRetriever, Document
 from langchain.prompts.prompt import PromptTemplate
 def get_new_chain1(vectorstore, vectorstore_radio, model_selector, k_textbox, search_type_selector, max_tokens_textbox) -> Chain:
     retriever = None
     if vectorstore_radio == 'Chroma':
         retriever = vectorstore.as_retriever(search_type=search_type_selector)
         retriever.search_kwargs = {"k":int(k_textbox)}
     if vectorstore_radio == 'raw':
         if search_type_selector == 'svm':
             retriever = SVMRetriever.from_texts(merged_vectorstore, embedding_function)
             retriever.k = int(k_textbox)
-    template = """You are called chat-pykg and are an AI assistant coded in python using langchain and gradio. You are very helpful for answering questions about various open source libraries.
-                You are given the following extracted parts of code and a question. Provide a conversational answer to the question.
-                Do NOT make up any hyperlinks that are not in the code.
                 If you don't know the answer, just say that you don't know, don't try to make up an answer.
-                Question: {question}
                 =========
-                {context}
                 =========
-                Answer in Markdown:"""
-    QA_PROMPT.template = template
     if model_selector in ['gpt-4', 'gpt-3.5-turbo']:
         llm = ChatOpenAI(client = None, temperature=0.7, model_name=model_selector)
         doc_chain_llm = ChatOpenAI(client = None, streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0.7, model_name=model_selector, max_tokens=int(max_tokens_textbox))
@@ -49,8 +91,18 @@ def get_new_chain1(vectorstore, vectorstore_radio, model_selector, k_textbox, se
     # memory = ConversationKGMemory(llm=llm, input_key="question", output_key="answer")
     memory = ConversationBufferWindowMemory(input_key="question", output_key="answer", k=5)
-    qa = ConversationalRetrievalChain(
-        retriever=retriever, memory=memory, combine_docs_chain=doc_chain, question_generator=question_generator, verbose=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
-    # qa._get_docs = _get_docs.__get__(qa, ConversationalRetrievalChain)
     return qa

 from langchain.chains.llm import LLMChain
 from langchain.schema import BaseLanguageModel, BaseRetriever, Document
 from langchain.prompts.prompt import PromptTemplate
+from langchain.utilities.google_serper import GoogleSerperAPIWrapper
+from langchain.utilities.google_search import GoogleSearchAPIWrapper
+from langchain.agents.self_ask_with_search.base import SelfAskWithSearchChain
+from langchain.agents.self_ask_with_search.prompt import PROMPT
+class ConversationalRetrievalChainWithGoogleSearch(ConversationalRetrievalChain):
+    google_search_tool: GoogleSearchAPIWrapper
+    def _get_docs(self, question: str, inputs: Dict[str, Any]) -> List[Document]:
+        # Get documents from the retriever
+        docs_from_retriever = self.retriever.get_relevant_documents(question)
+        # Get search results from Google Search
+        search_results = self.google_search_tool.results(question, num_results=self.google_search_tool.k)
+        # Create documents from the search results
+        docs_from_search = []
+        for result in search_results:
+            content = result.get("snippet", "")
+            metadata = {"title": result["title"], "link": result["link"]}
+            docs_from_search.append(Document(page_content=content, metadata=metadata))
+        # Combine both lists of documents
+        docs = docs_from_retriever + docs_from_search
+        return self._reduce_tokens_below_limit(docs)
 def get_new_chain1(vectorstore, vectorstore_radio, model_selector, k_textbox, search_type_selector, max_tokens_textbox) -> Chain:
     retriever = None
     if vectorstore_radio == 'Chroma':
         retriever = vectorstore.as_retriever(search_type=search_type_selector)
         retriever.search_kwargs = {"k":int(k_textbox)}
+        if search_type_selector == 'mmr':
+            retriever.search_kwargs = {"k":int(k_textbox), "fetch_k":4*int(k_textbox)}
     if vectorstore_radio == 'raw':
         if search_type_selector == 'svm':
             retriever = SVMRetriever.from_texts(merged_vectorstore, embedding_function)
             retriever.k = int(k_textbox)
+    qa_template = """You are called chat-pykg and are an AI assistant coded in python using langchain and gradio. You are very helpful for answering questions about programming with various open source packages and libraries.
+                You are given snippets of code and information in the Context below, as well as a Question to give a Helpful answer to.
+                Due to data size limitations, the snippets of code in the Context have been specifically filtered/selected for their relevance from a document store containing code from one or many packages and libraries.
+                Each of the code snippets is marked with '# source: package/filename' so you can attempt to establish where they are located in their package structure and gain more understanding of the code.
+                Please provide a helpful answer in markdown to the Question.
+                Do not make up any hyperlinks that are not in the Context.
                 If you don't know the answer, just say that you don't know, don't try to make up an answer.
                 =========
+                Context:{context}
                 =========
+                Question: {question}
+                Helpful answer:"""
+    QA_PROMPT.template = qa_template
+    condense_question_template = """Given the following conversation and a Follow Up Input, rephrase the Follow Up Input to be a Standalone question.
+    The Standalone question will be used for retrieving relevant source code and information from a document store, where each document is marked with '# source: package/filename'.
+    Therefore, in your Standalone question you must try to include references to related code or sources that have been mentioned in the Follow Up Input or Chat History.
+    =========
+    Chat History:
+    {chat_history}
+    =========
+    Follow Up Input: {question}
+    Standalone question in markdown:"""
+    CONDENSE_QUESTION_PROMPT.template = condense_question_template
     if model_selector in ['gpt-4', 'gpt-3.5-turbo']:
         llm = ChatOpenAI(client = None, temperature=0.7, model_name=model_selector)
         doc_chain_llm = ChatOpenAI(client = None, streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True, temperature=0.7, model_name=model_selector, max_tokens=int(max_tokens_textbox))
     # memory = ConversationKGMemory(llm=llm, input_key="question", output_key="answer")
     memory = ConversationBufferWindowMemory(input_key="question", output_key="answer", k=5)
+    google_search_tool = GoogleSearchAPIWrapper(search_engine = "google", k = int(int(k_textbox)/2))
+    qa_orig = ConversationalRetrievalChain(
+        retriever=retriever, memory=memory, combine_docs_chain=doc_chain, question_generator=question_generator, verbose=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
+    qa_with_google_search = ConversationalRetrievalChainWithGoogleSearch(
+        retriever=retriever,
+        memory=memory,
+        combine_docs_chain=doc_chain,
+        question_generator=question_generator,
+        google_search_tool=google_search_tool,
+        verbose=True,
+        callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])
+    )
+    qa = qa_orig
     return qa

requirements.txt CHANGED Viewed

@@ -7,4 +7,5 @@ transformers
 gradio
 chromadb
 sentence_transformers
-python-magic

 gradio
 chromadb
 sentence_transformers
+python-magic
+google-api-python-client