|
import gradio as gr |
|
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader |
|
from langchain.prompts import PromptTemplate |
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
from langchain_community.vectorstores import FAISS |
|
from langchain_community.llms import CTransformers |
|
from langchain.chains import RetrievalQA |
|
|
|
DB_FAISS_PATH = 'vectorstore/db_faiss' |
|
|
|
custom_prompt_template = """Use the following pieces of information to answer the user's question. |
|
If you don't know the answer, just say that you don't know, don't try to make up an answer. |
|
|
|
Context: {context} |
|
Question: {question} |
|
|
|
Only return the helpful answer below and nothing else. |
|
Helpful answer: |
|
""" |
|
|
|
def set_custom_prompt(): |
|
prompt = PromptTemplate(template=custom_prompt_template, |
|
input_variables=['context', 'question']) |
|
return prompt |
|
|
|
def retrieval_qa_chain(llm, prompt, db): |
|
qa_chain = RetrievalQA.from_chain_type(llm=llm, |
|
chain_type='stuff', |
|
retriever=db.as_retriever(search_kwargs={'k': 2}), |
|
return_source_documents=True, |
|
chain_type_kwargs={'prompt': prompt} |
|
) |
|
return qa_chain |
|
|
|
def load_llm(): |
|
llm = CTransformers( |
|
model="TheBloke/Llama-2-7B-Chat-GGML", |
|
model_type="llama", |
|
max_new_tokens=512, |
|
temperature=0.5 |
|
) |
|
return llm |
|
|
|
def qa_bot(): |
|
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", |
|
model_kwargs={'device': 'cpu'}) |
|
db = FAISS.load_local(DB_FAISS_PATH, embeddings) |
|
llm = load_llm() |
|
qa_prompt = set_custom_prompt() |
|
qa = retrieval_qa_chain(llm, qa_prompt, db) |
|
return qa |
|
|
|
|
|
def respond(message, history, system_message, max_tokens, temperature, top_p): |
|
qa_result = qa_bot() |
|
response = qa_result({'query': message}) |
|
return response |
|
|
|
|
|
demo = gr.ChatInterface( |
|
respond, |
|
additional_inputs=[ |
|
gr.Textbox(value="You are a friendly Chatbot.", label="System message"), |
|
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), |
|
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), |
|
gr.Slider( |
|
minimum=0.1, |
|
maximum=1.0, |
|
value=0.95, |
|
step=0.05, |
|
label="Top-p (nucleus sampling)", |
|
), |
|
], |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|