Spaces:
Runtime error
Runtime error
Upload 4 files
Browse files
app.py
CHANGED
@@ -4,8 +4,7 @@ import os
|
|
4 |
import gradio as gr
|
5 |
import langchain
|
6 |
import pickle
|
7 |
-
from langchain.
|
8 |
-
from langchain import OpenAI
|
9 |
|
10 |
from chain import get_new_chain1
|
11 |
|
@@ -15,23 +14,24 @@ def get_faiss_store():
|
|
15 |
return faiss_store
|
16 |
|
17 |
|
18 |
-
def
|
19 |
-
if api_key:
|
20 |
-
os.environ["OPENAI_API_KEY"] = api_key
|
21 |
-
vectorstore = get_faiss_store()
|
22 |
|
23 |
-
|
24 |
-
final_output_llm = OpenAI(model_name="text-davinci-003", temperature=0, max_tokens=-1)
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
|
31 |
def chat(inp, history, agent):
|
32 |
history = history or []
|
33 |
if agent is None:
|
34 |
-
history.append((inp, "Please
|
35 |
return history, history
|
36 |
print("\n==== date/time: " + str(datetime.datetime.now()) + " ====")
|
37 |
print("inp: " + inp)
|
@@ -49,12 +49,10 @@ with block:
|
|
49 |
with gr.Row():
|
50 |
gr.Markdown("<h3><center>Hugging Face Doc Search</center></h3><p>Ask questions about the Hugging Face Transformers Library</p>")
|
51 |
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
type="password",
|
57 |
-
)
|
58 |
|
59 |
chatbot = gr.Chatbot()
|
60 |
|
@@ -90,10 +88,6 @@ with block:
|
|
90 |
submit.click(chat, inputs=[message, state, agent_state], outputs=[chatbot, state])
|
91 |
message.submit(chat, inputs=[message, state, agent_state], outputs=[chatbot, state])
|
92 |
|
93 |
-
|
94 |
-
set_openai_api_key,
|
95 |
-
inputs=[openai_api_key_textbox, agent_state],
|
96 |
-
outputs=[agent_state],
|
97 |
-
)
|
98 |
|
99 |
block.launch(debug=True)
|
|
|
4 |
import gradio as gr
|
5 |
import langchain
|
6 |
import pickle
|
7 |
+
from langchain.llms import HuggingFaceHub
|
|
|
8 |
|
9 |
from chain import get_new_chain1
|
10 |
|
|
|
14 |
return faiss_store
|
15 |
|
16 |
|
17 |
+
def load_model():
|
|
|
|
|
|
|
18 |
|
19 |
+
print(langchain.__file__)
|
|
|
20 |
|
21 |
+
vectorstore = get_faiss_store()
|
22 |
+
|
23 |
+
flan_ul = HuggingFaceHub(repo_id="google/flan-ul2",
|
24 |
+
model_kwargs={"temperature":0.1, "max_new_tokens":200},
|
25 |
+
huggingfacehub_api_token="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")
|
26 |
+
|
27 |
+
qa_chain = get_new_chain1(vectorstore, flan_ul, flan_ul, isFlan=True)
|
28 |
+
return qa_chain
|
29 |
|
30 |
|
31 |
def chat(inp, history, agent):
|
32 |
history = history or []
|
33 |
if agent is None:
|
34 |
+
history.append((inp, "Please click Load Model or wait for model to load"))
|
35 |
return history, history
|
36 |
print("\n==== date/time: " + str(datetime.datetime.now()) + " ====")
|
37 |
print("inp: " + inp)
|
|
|
49 |
with gr.Row():
|
50 |
gr.Markdown("<h3><center>Hugging Face Doc Search</center></h3><p>Ask questions about the Hugging Face Transformers Library</p>")
|
51 |
|
52 |
+
load_model_button = gr.Button(
|
53 |
+
value="Load Model",
|
54 |
+
variant="secondary"
|
55 |
+
).style(full_width=False)
|
|
|
|
|
56 |
|
57 |
chatbot = gr.Chatbot()
|
58 |
|
|
|
88 |
submit.click(chat, inputs=[message, state, agent_state], outputs=[chatbot, state])
|
89 |
message.submit(chat, inputs=[message, state, agent_state], outputs=[chatbot, state])
|
90 |
|
91 |
+
load_model_button.click(load_model, outputs=[agent_state])
|
|
|
|
|
|
|
|
|
92 |
|
93 |
block.launch(debug=True)
|
chain.py
CHANGED
@@ -4,14 +4,12 @@ import pathlib
|
|
4 |
import pickle
|
5 |
from typing import Dict, List, Tuple
|
6 |
|
7 |
-
import
|
8 |
-
from langchain import OpenAI, PromptTemplate
|
9 |
from langchain.chains import LLMChain
|
10 |
from langchain.chains.base import Chain
|
11 |
from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
|
12 |
from langchain.chains.conversation.memory import ConversationBufferMemory
|
13 |
from langchain.chains.question_answering import load_qa_chain
|
14 |
-
from langchain.embeddings import OpenAIEmbeddings
|
15 |
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
|
16 |
from langchain.prompts.example_selector import \
|
17 |
SemanticSimilarityExampleSelector
|
@@ -42,15 +40,23 @@ class CustomChain(Chain, BaseModel):
|
|
42 |
else:
|
43 |
new_question = question
|
44 |
print(new_question)
|
45 |
-
docs = self.vstore.similarity_search(new_question, k=
|
46 |
new_inputs = inputs.copy()
|
47 |
new_inputs["question"] = new_question
|
48 |
new_inputs["chat_history"] = chat_history_str
|
49 |
answer, _ = self.chain.combine_docs(docs, **new_inputs)
|
50 |
-
return {"answer": answer}
|
51 |
|
|
|
|
|
52 |
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
_eg_template = """## Example:
|
55 |
|
56 |
Chat History:
|
@@ -73,7 +79,7 @@ def get_new_chain1(vectorstore, rephraser_llm, final_output_llm) -> Chain:
|
|
73 |
#### LOAD VSTORE WITH REPHRASE EXAMPLES
|
74 |
with open("rephrase_eg.pkl", 'rb') as f:
|
75 |
rephrase_example_selector = pickle.load(f)
|
76 |
-
|
77 |
prompt = FewShotPromptTemplate(
|
78 |
prefix=_prefix,
|
79 |
suffix=_suffix,
|
@@ -89,25 +95,38 @@ def get_new_chain1(vectorstore, rephraser_llm, final_output_llm) -> Chain:
|
|
89 |
input_variables=["page_content", "source"],
|
90 |
)
|
91 |
|
92 |
-
|
93 |
-
You are given
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
96 |
For example, if someone asks how to install Transformers, you should say:
|
97 |
|
98 |
-
You can install with pip
|
99 |
'''py
|
100 |
pip install transformers
|
101 |
'''
|
|
|
102 |
|
103 |
-
If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
|
104 |
-
If the question is not about Hugging Face Transformers, politely inform them that you are tuned to only answer questions about Transformers.
|
105 |
Question: {question}
|
106 |
=========
|
107 |
{context}
|
108 |
=========
|
109 |
Answer in Markdown:"""
|
110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
doc_chain = load_qa_chain(
|
112 |
final_output_llm,
|
113 |
chain_type="stuff",
|
@@ -120,8 +139,10 @@ Answer in Markdown:"""
|
|
120 |
|
121 |
def _get_chat_history(chat_history: List[Tuple[str, str]]):
|
122 |
buffer = ""
|
123 |
-
for human_s, ai_s in chat_history:
|
124 |
human = f"Human: " + human_s
|
125 |
ai = f"Assistant: " + ai_s
|
126 |
buffer += "\n" + "\n".join([human, ai])
|
|
|
|
|
127 |
return buffer
|
|
|
4 |
import pickle
|
5 |
from typing import Dict, List, Tuple
|
6 |
|
7 |
+
from langchain import PromptTemplate
|
|
|
8 |
from langchain.chains import LLMChain
|
9 |
from langchain.chains.base import Chain
|
10 |
from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
|
11 |
from langchain.chains.conversation.memory import ConversationBufferMemory
|
12 |
from langchain.chains.question_answering import load_qa_chain
|
|
|
13 |
from langchain.prompts import FewShotPromptTemplate, PromptTemplate
|
14 |
from langchain.prompts.example_selector import \
|
15 |
SemanticSimilarityExampleSelector
|
|
|
40 |
else:
|
41 |
new_question = question
|
42 |
print(new_question)
|
43 |
+
docs = self.vstore.similarity_search(new_question, k=3)
|
44 |
new_inputs = inputs.copy()
|
45 |
new_inputs["question"] = new_question
|
46 |
new_inputs["chat_history"] = chat_history_str
|
47 |
answer, _ = self.chain.combine_docs(docs, **new_inputs)
|
|
|
48 |
|
49 |
+
## Dedupe source list
|
50 |
+
source_list = [doc.metadata['source'] for doc in docs]
|
51 |
|
52 |
+
source_string = "\n\n*Sources:* "
|
53 |
+
for i, source in enumerate(set(source_list)):
|
54 |
+
source_string += f"[[{i}](https://{source})]"
|
55 |
+
|
56 |
+
final_answer = answer + source_string
|
57 |
+
return {"answer": final_answer}
|
58 |
+
|
59 |
+
def get_new_chain1(vectorstore, rephraser_llm, final_output_llm, isFlan) -> Chain:
|
60 |
_eg_template = """## Example:
|
61 |
|
62 |
Chat History:
|
|
|
79 |
#### LOAD VSTORE WITH REPHRASE EXAMPLES
|
80 |
with open("rephrase_eg.pkl", 'rb') as f:
|
81 |
rephrase_example_selector = pickle.load(f)
|
82 |
+
|
83 |
prompt = FewShotPromptTemplate(
|
84 |
prefix=_prefix,
|
85 |
suffix=_suffix,
|
|
|
95 |
input_variables=["page_content", "source"],
|
96 |
)
|
97 |
|
98 |
+
gpt_template = """You are an AI assistant for the open source transformers library provided by Hugging Face. The documentation is located at https://huggingface.co/docs/transformers.
|
99 |
+
- You are given extracted parts of a long document and a question.
|
100 |
+
- Provide a conversational answer with a hyperlink to the documentation based on the "source".
|
101 |
+
- Do NOT add .html to the end of links. Make sure to bold link text.
|
102 |
+
- You should only use hyperlinks that are explicitly listed as a source in the context. Do NOT make up a hyperlink that is not listed.
|
103 |
+
- If the question includes a request for code, provide a code block directly from the documentation.
|
104 |
+
- If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
|
105 |
+
- If the question is not about Hugging Face Transformers, politely inform them that you are tuned to only answer questions about Transformers.
|
106 |
+
|
107 |
For example, if someone asks how to install Transformers, you should say:
|
108 |
|
109 |
+
You can install with pip:
|
110 |
'''py
|
111 |
pip install transformers
|
112 |
'''
|
113 |
+
**(Source)**[https://huggingface.co/docs/transformers/main/en/installation]
|
114 |
|
|
|
|
|
115 |
Question: {question}
|
116 |
=========
|
117 |
{context}
|
118 |
=========
|
119 |
Answer in Markdown:"""
|
120 |
+
|
121 |
+
flan_template = """
|
122 |
+
{context}
|
123 |
+
Based on the above documentation, answer the user's question in markdown: {question}"""
|
124 |
+
|
125 |
+
PROMPT = PromptTemplate(template=gpt_template, input_variables=["question", "context"])
|
126 |
+
|
127 |
+
if isFlan:
|
128 |
+
PROMPT = PromptTemplate(template=flan_template, input_variables=["question", "context"])
|
129 |
+
|
130 |
doc_chain = load_qa_chain(
|
131 |
final_output_llm,
|
132 |
chain_type="stuff",
|
|
|
139 |
|
140 |
def _get_chat_history(chat_history: List[Tuple[str, str]]):
|
141 |
buffer = ""
|
142 |
+
for human_s, ai_s in chat_history[-2:]:
|
143 |
human = f"Human: " + human_s
|
144 |
ai = f"Assistant: " + ai_s
|
145 |
buffer += "\n" + "\n".join([human, ai])
|
146 |
+
|
147 |
+
|
148 |
return buffer
|
ingest.py
CHANGED
@@ -4,9 +4,10 @@ from pathlib import Path
|
|
4 |
from markdown import markdown
|
5 |
|
6 |
import pickle
|
|
|
7 |
from bs4 import BeautifulSoup
|
8 |
from langchain.text_splitter import CharacterTextSplitter
|
9 |
-
from langchain.embeddings import HuggingFaceInstructEmbeddings
|
10 |
from langchain.vectorstores import FAISS
|
11 |
from InstructorEmbedding import INSTRUCTOR
|
12 |
|
@@ -16,7 +17,9 @@ def clean_data(data):
|
|
16 |
html = markdown(data)
|
17 |
soup = BeautifulSoup(html, "html.parser")
|
18 |
text = ''.join(soup.findAll(text=True))
|
19 |
-
|
|
|
|
|
20 |
|
21 |
docs = []
|
22 |
metadatas = []
|
|
|
4 |
from markdown import markdown
|
5 |
|
6 |
import pickle
|
7 |
+
import re
|
8 |
from bs4 import BeautifulSoup
|
9 |
from langchain.text_splitter import CharacterTextSplitter
|
10 |
+
from langchain.embeddings import HuggingFaceInstructEmbeddings
|
11 |
from langchain.vectorstores import FAISS
|
12 |
from InstructorEmbedding import INSTRUCTOR
|
13 |
|
|
|
17 |
html = markdown(data)
|
18 |
soup = BeautifulSoup(html, "html.parser")
|
19 |
text = ''.join(soup.findAll(text=True))
|
20 |
+
cleaned_text = re.sub(r"<!--.*?-->", "", text, flags=re.DOTALL)
|
21 |
+
print(cleaned_text)
|
22 |
+
return "\n".join([t for t in cleaned_text.split("\n") if t])
|
23 |
|
24 |
docs = []
|
25 |
metadatas = []
|