Spaces:

salgadev
/

dolly-expert-builder

Sleeping

App Files Files Community

dolly-expert-builder / app.py

salgadev

Update app.py

957d4a5 about 1 year ago

raw

history blame

5.83 kB

	import gradio as gr

	from langchain.chains import RetrievalQA
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.llms import HuggingFacePipeline
	from langchain.memory import ConversationBufferWindowMemory
	from langchain.vectorstores import Chroma
	from langchain import PromptTemplate, LLMChain

	from transformers import AutoTokenizer, pipeline

	from typing import Dict, Any

	import torch


	# class AnswerConversationBufferMemory(ConversationBufferMemory):
	class AnswerConversationBufferMemory(ConversationBufferWindowMemory):
	def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None:
	return super(AnswerConversationBufferMemory, self).save_context(inputs,{'response': outputs['result']})


	def clean_text(text):
	# Remove excessive whitespace
	cleaned_text = ' '.join(text.split())
	# Keep max one newline character
	cleaned_text = cleaned_text.replace('\n\n', '\n')

	return cleaned_text


	def chatbot_llm_response(llm_response):
	text = clean_text(llm_response['result']) + '\nSources:\n'
	for source in llm_response["source_documents"]:
	text += source.metadata['source'] + '\n'

	return text


	model_name = "databricks/dolly-v2-3b"
	tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")

	generate_text = pipeline(model=model_name,
	torch_dtype=torch.bfloat16,
	trust_remote_code=True,
	device_map="auto",
	return_full_text=True,
	max_new_tokens=256,
	top_p=0.95,
	top_k=50)

	prompt = PromptTemplate(
	input_variables=["instruction"],
	template="{instruction}")

	hf_pipeline = HuggingFacePipeline(pipeline=generate_text)
	llm_chain = LLMChain(llm=hf_pipeline, prompt=prompt)

	# top #2 when task = Retrieval June 2023 for under ~500 MB
	model_name = "intfloat/e5-base-v2"
	hf = HuggingFaceEmbeddings(model_name=model_name)

	# Load up Vector Database
	persist_directory = 'db'
	vectordb = Chroma(persist_directory=persist_directory,
	embedding_function=hf)
	vectordb.get()
	retriever = vectordb.as_retriever(search_kwargs={'k':3})

	# Configure Conversation Chain
	memory = AnswerConversationBufferMemory(k=3)
	qa_chain_with_memory = RetrievalQA.from_chain_type(llm=hf_pipeline,
	chain_type="stuff",
	retriever=retriever,
	return_source_documents=True,
	memory=memory)
	# try to set the tone
	template = '''
	You are the assistant to a tradesperson with knowledge of the Ontario Building Code. You provide specific details using the context given and the users question.
	If you don't know the answer, you truthfully say you don't know and don't try to make up an answer.
	----------------
	{context}

	Question: {question}
	Helpful Answer:'''

	qa_chain_with_memory.combine_documents_chain.llm_chain.prompt.template = template


	examples = ["What are the requirements for plumbing venting and drainage systems?",
	"Summarize the electrical code regulation for wiring commercial buildings",
	"Tell me the maximum allowable span for floor joists in residential construction",
	"I'm looking for guidelines for fire assemblies and walls in tall buildings",
	"What are the insulation requirements in new residential constructions?"]


	def process_example(args):
	for x in generate(args):
	pass
	return x


	def generate(instruction):
	response = qa_chain_with_memory(instruction)
	processed_response = chatbot_llm_response(response)

	result = ""
	for word in processed_response.split(" "):
	result += word + " "
	yield result


	with gr.Blocks(analytics_enabled=False) as demo:
	with gr.Column():
	gr.Markdown("""# 🐑 Dolly-Expert-Lite
	Dolly-Expert-Lite is a bot for domain specific question
	answering. Currently powered by the new Dolly-v2-3b open
	source model. It's expert systems in the era of LLMs!

	## 🏗️ Building Code Expert

	In this example deployment, Dolly-Expert-Lite retrieves
	information via a vector database made using the
	[Ontario (Canada) Building Code](https://www.buildingcode.online)
	sitemap LangChain loader. For details on the original Dolly
	v2 model, please refer to the
	[model card](https://huggingface.co/databricks/dolly-v2-12b)

	### Type in the box below and click to ask the expert!
	"""
	)

	with gr.Row():
	with gr.Column(scale=3):
	instruction = gr.Textbox(placeholder="Enter your question here", label="Question", elem_id="q-input")

	with gr.Box():
	gr.Markdown("Answer")
	output = gr.Markdown(elem_id="q-output")
	submit = gr.Button("Generate", variant="primary")
	clear = gr.Button("Clear", variant="secondary")

	gr.Examples(
	examples=examples,
	inputs=[instruction],
	cache_examples=False,
	fn=process_example,
	outputs=[output],
	)

	submit.click(generate, inputs=[instruction], outputs=[output])
	clear.click(lambda: None, [], [output])
	instruction.submit(generate, inputs=[instruction], outputs=[output])

	demo.queue(concurrency_count=16).launch()
	demo.launch()