enoreyes commited on
Commit
a56c9af
1 Parent(s): fa8c8ef

Upload 4 files

Browse files
Files changed (3) hide show
  1. app.py +17 -23
  2. chain.py +37 -16
  3. ingest.py +5 -2
app.py CHANGED
@@ -4,8 +4,7 @@ import os
4
  import gradio as gr
5
  import langchain
6
  import pickle
7
- from langchain.vectorstores import Weaviate
8
- from langchain import OpenAI
9
 
10
  from chain import get_new_chain1
11
 
@@ -15,23 +14,24 @@ def get_faiss_store():
15
  return faiss_store
16
 
17
 
18
- def set_openai_api_key(api_key, agent):
19
- if api_key:
20
- os.environ["OPENAI_API_KEY"] = api_key
21
- vectorstore = get_faiss_store()
22
 
23
- rephraser_llm = OpenAI(model_name="text-davinci-003", temperature=0)
24
- final_output_llm = OpenAI(model_name="text-davinci-003", temperature=0, max_tokens=-1)
25
 
26
- qa_chain = get_new_chain1(vectorstore, rephraser_llm, final_output_llm)
27
- os.environ["OPENAI_API_KEY"] = ""
28
- return qa_chain
 
 
 
 
 
29
 
30
 
31
  def chat(inp, history, agent):
32
  history = history or []
33
  if agent is None:
34
- history.append((inp, "Please paste your OpenAI key to use"))
35
  return history, history
36
  print("\n==== date/time: " + str(datetime.datetime.now()) + " ====")
37
  print("inp: " + inp)
@@ -49,12 +49,10 @@ with block:
49
  with gr.Row():
50
  gr.Markdown("<h3><center>Hugging Face Doc Search</center></h3><p>Ask questions about the Hugging Face Transformers Library</p>")
51
 
52
- openai_api_key_textbox = gr.Textbox(
53
- placeholder="Paste your OpenAI API key (sk-...)",
54
- show_label=False,
55
- lines=1,
56
- type="password",
57
- )
58
 
59
  chatbot = gr.Chatbot()
60
 
@@ -90,10 +88,6 @@ with block:
90
  submit.click(chat, inputs=[message, state, agent_state], outputs=[chatbot, state])
91
  message.submit(chat, inputs=[message, state, agent_state], outputs=[chatbot, state])
92
 
93
- openai_api_key_textbox.change(
94
- set_openai_api_key,
95
- inputs=[openai_api_key_textbox, agent_state],
96
- outputs=[agent_state],
97
- )
98
 
99
  block.launch(debug=True)
 
4
  import gradio as gr
5
  import langchain
6
  import pickle
7
+ from langchain.llms import HuggingFaceHub
 
8
 
9
  from chain import get_new_chain1
10
 
 
14
  return faiss_store
15
 
16
 
17
+ def load_model():
 
 
 
18
 
19
+ print(langchain.__file__)
 
20
 
21
+ vectorstore = get_faiss_store()
22
+
23
+ flan_ul = HuggingFaceHub(repo_id="google/flan-ul2",
24
+ model_kwargs={"temperature":0.1, "max_new_tokens":200},
25
+ huggingfacehub_api_token="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")
26
+
27
+ qa_chain = get_new_chain1(vectorstore, flan_ul, flan_ul, isFlan=True)
28
+ return qa_chain
29
 
30
 
31
  def chat(inp, history, agent):
32
  history = history or []
33
  if agent is None:
34
+ history.append((inp, "Please click Load Model or wait for model to load"))
35
  return history, history
36
  print("\n==== date/time: " + str(datetime.datetime.now()) + " ====")
37
  print("inp: " + inp)
 
49
  with gr.Row():
50
  gr.Markdown("<h3><center>Hugging Face Doc Search</center></h3><p>Ask questions about the Hugging Face Transformers Library</p>")
51
 
52
+ load_model_button = gr.Button(
53
+ value="Load Model",
54
+ variant="secondary"
55
+ ).style(full_width=False)
 
 
56
 
57
  chatbot = gr.Chatbot()
58
 
 
88
  submit.click(chat, inputs=[message, state, agent_state], outputs=[chatbot, state])
89
  message.submit(chat, inputs=[message, state, agent_state], outputs=[chatbot, state])
90
 
91
+ load_model_button.click(load_model, outputs=[agent_state])
 
 
 
 
92
 
93
  block.launch(debug=True)
chain.py CHANGED
@@ -4,14 +4,12 @@ import pathlib
4
  import pickle
5
  from typing import Dict, List, Tuple
6
 
7
- import weaviate
8
- from langchain import OpenAI, PromptTemplate
9
  from langchain.chains import LLMChain
10
  from langchain.chains.base import Chain
11
  from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
12
  from langchain.chains.conversation.memory import ConversationBufferMemory
13
  from langchain.chains.question_answering import load_qa_chain
14
- from langchain.embeddings import OpenAIEmbeddings
15
  from langchain.prompts import FewShotPromptTemplate, PromptTemplate
16
  from langchain.prompts.example_selector import \
17
  SemanticSimilarityExampleSelector
@@ -42,15 +40,23 @@ class CustomChain(Chain, BaseModel):
42
  else:
43
  new_question = question
44
  print(new_question)
45
- docs = self.vstore.similarity_search(new_question, k=4)
46
  new_inputs = inputs.copy()
47
  new_inputs["question"] = new_question
48
  new_inputs["chat_history"] = chat_history_str
49
  answer, _ = self.chain.combine_docs(docs, **new_inputs)
50
- return {"answer": answer}
51
 
 
 
52
 
53
- def get_new_chain1(vectorstore, rephraser_llm, final_output_llm) -> Chain:
 
 
 
 
 
 
 
54
  _eg_template = """## Example:
55
 
56
  Chat History:
@@ -73,7 +79,7 @@ def get_new_chain1(vectorstore, rephraser_llm, final_output_llm) -> Chain:
73
  #### LOAD VSTORE WITH REPHRASE EXAMPLES
74
  with open("rephrase_eg.pkl", 'rb') as f:
75
  rephrase_example_selector = pickle.load(f)
76
-
77
  prompt = FewShotPromptTemplate(
78
  prefix=_prefix,
79
  suffix=_suffix,
@@ -89,25 +95,38 @@ def get_new_chain1(vectorstore, rephraser_llm, final_output_llm) -> Chain:
89
  input_variables=["page_content", "source"],
90
  )
91
 
92
- template = """You are an AI assistant for the open source transformers library provided by Hugging Face. The documentation is located at https://huggingface.co/docs/transformers.
93
- You are given the following extracted parts of a long document and a question. Provide a conversational answer with a hyperlink to the documentation. Do NOT add .html to the end of links. Make sure to bold link text.
94
- You should only use hyperlinks that are explicitly listed as a source in the context. Do NOT make up a hyperlink that is not listed.
95
- If the question includes a request for code, provide a code block directly from the documentation.
 
 
 
 
 
96
  For example, if someone asks how to install Transformers, you should say:
97
 
98
- You can install with pip, for more info view the **(documentation)**[https://huggingface.co/docs/transformers/installation]
99
  '''py
100
  pip install transformers
101
  '''
 
102
 
103
- If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
104
- If the question is not about Hugging Face Transformers, politely inform them that you are tuned to only answer questions about Transformers.
105
  Question: {question}
106
  =========
107
  {context}
108
  =========
109
  Answer in Markdown:"""
110
- PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
 
 
 
 
 
 
 
 
 
111
  doc_chain = load_qa_chain(
112
  final_output_llm,
113
  chain_type="stuff",
@@ -120,8 +139,10 @@ Answer in Markdown:"""
120
 
121
  def _get_chat_history(chat_history: List[Tuple[str, str]]):
122
  buffer = ""
123
- for human_s, ai_s in chat_history:
124
  human = f"Human: " + human_s
125
  ai = f"Assistant: " + ai_s
126
  buffer += "\n" + "\n".join([human, ai])
 
 
127
  return buffer
 
4
  import pickle
5
  from typing import Dict, List, Tuple
6
 
7
+ from langchain import PromptTemplate
 
8
  from langchain.chains import LLMChain
9
  from langchain.chains.base import Chain
10
  from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
11
  from langchain.chains.conversation.memory import ConversationBufferMemory
12
  from langchain.chains.question_answering import load_qa_chain
 
13
  from langchain.prompts import FewShotPromptTemplate, PromptTemplate
14
  from langchain.prompts.example_selector import \
15
  SemanticSimilarityExampleSelector
 
40
  else:
41
  new_question = question
42
  print(new_question)
43
+ docs = self.vstore.similarity_search(new_question, k=3)
44
  new_inputs = inputs.copy()
45
  new_inputs["question"] = new_question
46
  new_inputs["chat_history"] = chat_history_str
47
  answer, _ = self.chain.combine_docs(docs, **new_inputs)
 
48
 
49
+ ## Dedupe source list
50
+ source_list = [doc.metadata['source'] for doc in docs]
51
 
52
+ source_string = "\n\n*Sources:* "
53
+ for i, source in enumerate(set(source_list)):
54
+ source_string += f"[[{i}](https://{source})]"
55
+
56
+ final_answer = answer + source_string
57
+ return {"answer": final_answer}
58
+
59
+ def get_new_chain1(vectorstore, rephraser_llm, final_output_llm, isFlan) -> Chain:
60
  _eg_template = """## Example:
61
 
62
  Chat History:
 
79
  #### LOAD VSTORE WITH REPHRASE EXAMPLES
80
  with open("rephrase_eg.pkl", 'rb') as f:
81
  rephrase_example_selector = pickle.load(f)
82
+
83
  prompt = FewShotPromptTemplate(
84
  prefix=_prefix,
85
  suffix=_suffix,
 
95
  input_variables=["page_content", "source"],
96
  )
97
 
98
+ gpt_template = """You are an AI assistant for the open source transformers library provided by Hugging Face. The documentation is located at https://huggingface.co/docs/transformers.
99
+ - You are given extracted parts of a long document and a question.
100
+ - Provide a conversational answer with a hyperlink to the documentation based on the "source".
101
+ - Do NOT add .html to the end of links. Make sure to bold link text.
102
+ - You should only use hyperlinks that are explicitly listed as a source in the context. Do NOT make up a hyperlink that is not listed.
103
+ - If the question includes a request for code, provide a code block directly from the documentation.
104
+ - If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
105
+ - If the question is not about Hugging Face Transformers, politely inform them that you are tuned to only answer questions about Transformers.
106
+
107
  For example, if someone asks how to install Transformers, you should say:
108
 
109
+ You can install with pip:
110
  '''py
111
  pip install transformers
112
  '''
113
+ **(Source)**[https://huggingface.co/docs/transformers/main/en/installation]
114
 
 
 
115
  Question: {question}
116
  =========
117
  {context}
118
  =========
119
  Answer in Markdown:"""
120
+
121
+ flan_template = """
122
+ {context}
123
+ Based on the above documentation, answer the user's question in markdown: {question}"""
124
+
125
+ PROMPT = PromptTemplate(template=gpt_template, input_variables=["question", "context"])
126
+
127
+ if isFlan:
128
+ PROMPT = PromptTemplate(template=flan_template, input_variables=["question", "context"])
129
+
130
  doc_chain = load_qa_chain(
131
  final_output_llm,
132
  chain_type="stuff",
 
139
 
140
  def _get_chat_history(chat_history: List[Tuple[str, str]]):
141
  buffer = ""
142
+ for human_s, ai_s in chat_history[-2:]:
143
  human = f"Human: " + human_s
144
  ai = f"Assistant: " + ai_s
145
  buffer += "\n" + "\n".join([human, ai])
146
+
147
+
148
  return buffer
ingest.py CHANGED
@@ -4,9 +4,10 @@ from pathlib import Path
4
  from markdown import markdown
5
 
6
  import pickle
 
7
  from bs4 import BeautifulSoup
8
  from langchain.text_splitter import CharacterTextSplitter
9
- from langchain.embeddings import HuggingFaceInstructEmbeddings, OpenAIEmbeddings
10
  from langchain.vectorstores import FAISS
11
  from InstructorEmbedding import INSTRUCTOR
12
 
@@ -16,7 +17,9 @@ def clean_data(data):
16
  html = markdown(data)
17
  soup = BeautifulSoup(html, "html.parser")
18
  text = ''.join(soup.findAll(text=True))
19
- return "\n".join([t for t in text.split("\n") if t])
 
 
20
 
21
  docs = []
22
  metadatas = []
 
4
  from markdown import markdown
5
 
6
  import pickle
7
+ import re
8
  from bs4 import BeautifulSoup
9
  from langchain.text_splitter import CharacterTextSplitter
10
+ from langchain.embeddings import HuggingFaceInstructEmbeddings
11
  from langchain.vectorstores import FAISS
12
  from InstructorEmbedding import INSTRUCTOR
13
 
 
17
  html = markdown(data)
18
  soup = BeautifulSoup(html, "html.parser")
19
  text = ''.join(soup.findAll(text=True))
20
+ cleaned_text = re.sub(r"<!--.*?-->", "", text, flags=re.DOTALL)
21
+ print(cleaned_text)
22
+ return "\n".join([t for t in cleaned_text.split("\n") if t])
23
 
24
  docs = []
25
  metadatas = []