Spaces:
Sleeping
Sleeping
import gradio as gr | |
from langchain.chains import RetrievalQA | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.llms import HuggingFacePipeline | |
from langchain.memory import ConversationBufferWindowMemory | |
from langchain.vectorstores import Chroma | |
from langchain import PromptTemplate, LLMChain | |
from transformers import AutoTokenizer, pipeline | |
from typing import Dict, Any | |
import torch | |
# class AnswerConversationBufferMemory(ConversationBufferMemory): | |
class AnswerConversationBufferMemory(ConversationBufferWindowMemory): | |
def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None: | |
return super(AnswerConversationBufferMemory, self).save_context(inputs,{'response': outputs['result']}) | |
def clean_text(text): | |
# Remove excessive whitespace | |
cleaned_text = ' '.join(text.split()) | |
# Keep max one newline character | |
cleaned_text = cleaned_text.replace('\n\n', '\n') | |
return cleaned_text | |
def chatbot_llm_response(llm_response): | |
text = clean_text(llm_response['result']) + '\nSources:\n' | |
for source in llm_response["source_documents"]: | |
text += source.metadata['source'] + '\n' | |
return text | |
model_name = "databricks/dolly-v2-3b" | |
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left") | |
generate_text = pipeline(model=model_name, | |
torch_dtype=torch.bfloat16, | |
trust_remote_code=True, | |
device_map="auto", | |
return_full_text=True, | |
max_new_tokens=256, | |
top_p=0.95, | |
top_k=50) | |
prompt = PromptTemplate( | |
input_variables=["instruction"], | |
template="{instruction}") | |
hf_pipeline = HuggingFacePipeline(pipeline=generate_text) | |
llm_chain = LLMChain(llm=hf_pipeline, prompt=prompt) | |
# top #2 when task = Retrieval June 2023 for under ~500 MB | |
model_name = "intfloat/e5-base-v2" | |
hf = HuggingFaceEmbeddings(model_name=model_name) | |
# Load up Vector Database | |
persist_directory = 'db' | |
vectordb = Chroma(persist_directory=persist_directory, | |
embedding_function=hf) | |
vectordb.get() | |
retriever = vectordb.as_retriever(search_kwargs={'k':3}) | |
# Configure Conversation Chain | |
memory = AnswerConversationBufferMemory(k=3) | |
qa_chain_with_memory = RetrievalQA.from_chain_type(llm=hf_pipeline, | |
chain_type="stuff", | |
retriever=retriever, | |
return_source_documents=True, | |
memory=memory) | |
# try to set the tone | |
template = ''' | |
You are the assistant to a tradesperson with knowledge of the Ontario Building Code. You provide specific details using the context given and the users question. | |
If you don't know the answer, you truthfully say you don't know and don't try to make up an answer. | |
---------------- | |
{context} | |
Question: {question} | |
Helpful Answer:''' | |
qa_chain_with_memory.combine_documents_chain.llm_chain.prompt.template = template | |
examples = ["What are the requirements for plumbing venting and drainage systems?", | |
"Summarize the electrical code regulation for wiring commercial buildings", | |
"Tell me the maximum allowable span for floor joists in residential construction", | |
"I'm looking for guidelines for fire assemblies and walls in tall buildings", | |
"What are the insulation requirements in new residential constructions?"] | |
def process_example(args): | |
for x in generate(args): | |
pass | |
return x | |
def generate(instruction): | |
response = qa_chain_with_memory(instruction) | |
processed_response = chatbot_llm_response(response) | |
result = "" | |
for word in processed_response.split(" "): | |
result += word + " " | |
yield result | |
with gr.Blocks(analytics_enabled=False) as demo: | |
with gr.Column(): | |
gr.Markdown("""# π Dolly-Expert-Lite | |
Dolly-Expert-Lite is a bot for domain specific question | |
answering. Currently powered by the new Dolly-v2-3b open | |
source model. It's expert systems in the era of LLMs! | |
## ποΈ Building Code Expert | |
In this example deployment, Dolly-Expert-Lite retrieves | |
information via a vector database made using the | |
[Ontario (Canada) Building Code](https://www.buildingcode.online) | |
sitemap LangChain loader. For details on the original Dolly | |
v2 model, please refer to the | |
[model card](https://huggingface.co/databricks/dolly-v2-12b) | |
### Type in the box below and click to ask the expert! | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(scale=3): | |
instruction = gr.Textbox(placeholder="Enter your question here", label="Question", elem_id="q-input") | |
with gr.Box(): | |
gr.Markdown("**Answer**") | |
output = gr.Markdown(elem_id="q-output") | |
submit = gr.Button("Generate", variant="primary") | |
clear = gr.Button("Clear", variant="secondary") | |
gr.Examples( | |
examples=examples, | |
inputs=[instruction], | |
cache_examples=False, | |
fn=process_example, | |
outputs=[output], | |
) | |
submit.click(generate, inputs=[instruction], outputs=[output]) | |
clear.click(lambda: None, [], [output]) | |
instruction.submit(generate, inputs=[instruction], outputs=[output]) | |
demo.queue(concurrency_count=16).launch() | |
demo.launch() |