# gradio import gradio as gr import random import time #boto3 for S3 access import boto3 from botocore import UNSIGNED from botocore.client import Config # access .env file import os from dotenv import load_dotenv #from bs4 import BeautifulSoup # HF libraries from langchain.llms import HuggingFaceHub from langchain.embeddings import HuggingFaceHubEmbeddings # vectorestore from langchain.vectorstores import Chroma from langchain.vectorstores import FAISS # retrieval chain from langchain.chains import RetrievalQA # prompt template from langchain.prompts import PromptTemplate from langchain.memory import ConversationBufferMemory # logging #import logging import zipfile # load .env variables config = load_dotenv(".env") HUGGINGFACEHUB_API_TOKEN=os.getenv('HUGGINGFACEHUB_API_TOKEN') AWS_S3_LOCATION=os.getenv('AWS_S3_LOCATION') AWS_S3_FILE=os.getenv('AWS_S3_FILE') VS_DESTINATION=os.getenv('VS_DESTINATION') model_id = HuggingFaceHub(repo_id="HuggingFaceH4/zephyr-7b-beta", model_kwargs={ "temperature":0.1, "max_new_tokens":1024, "repetition_penalty":1.2, "streaming": True, "return_full_text":True }) model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1" embeddings = HuggingFaceHubEmbeddings(repo_id=model_name) s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED)) ## Chroma DB s3.download_file(AWS_S3_LOCATION, AWS_S3_FILE, VS_DESTINATION) db = Chroma(persist_directory="./vectorstore", embedding_function=embeddings) db.get() ## FAISS DB # s3.download_file('rad-rag-demos', 'vectorstores/faiss_db_ray.zip', './chroma_db/faiss_db_ray.zip') # with zipfile.ZipFile('./chroma_db/faiss_db_ray.zip', 'r') as zip_ref: # zip_ref.extractall('./chroma_db/') # FAISS_INDEX_PATH='./chroma_db/faiss_db_ray' # db = FAISS.load_local(FAISS_INDEX_PATH, embeddings) retriever = db.as_retriever(search_type = "mmr")#, search_kwargs={'k': 5, 'fetch_k': 25}) global qa template = """ You are the friendly documentation buddy Arti, who helps the Human in using RAY, the open-source unified framework for scaling AI and Python applications.\ Use the following context (delimited by ) and the chat history (delimited by ) to answer the question : ------ {context} ------ {history} ------ {question} Answer: """ prompt = PromptTemplate( input_variables=["history", "context", "question"], template=template, ) memory = ConversationBufferMemory(memory_key="history", input_key="question") qa = RetrievalQA.from_chain_type(llm=model_id, chain_type="stuff", retriever=retriever, verbose=True, return_source_documents=True, chain_type_kwargs={ "verbose": True, "memory": memory, "prompt": prompt } ) def add_text(history, text): history = history + [(text, None)] return history, "" def bot(history): response = infer(history[-1][0], history) print(*memory) sources = [doc.metadata.get("source") for doc in response['source_documents']] src_list = '\n'.join(sources) print_this = response['result']+"\n\n\n Sources: \n\n\n"+src_list #history[-1][1] = "" #for character in response['result']: #print_this: # history[-1][1] += character # time.sleep(0.05) # yield history history[-1][1] = print_this #response['result'] return history def infer(question, history): query = question result = qa({"query": query, "history": history, "question": question}) return result css=""" #col-container {max-width: 700px; margin-left: auto; margin-right: auto;} """ title = """

Chat with your Documentation

Chat with Documentation,
when everything is ready, you can start asking questions about the docu ;)

""" with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): gr.HTML(title) chatbot = gr.Chatbot([], elem_id="chatbot") clear = gr.Button("Clear") with gr.Row(): question = gr.Textbox(label="Question", placeholder="Type your question and hit Enter ") question.submit(add_text, [chatbot, question], [chatbot, question], queue=False).then( bot, chatbot, chatbot ) clear.click(lambda: None, None, chatbot, queue=False) demo.queue() demo.launch()